Coverage for scripts / test_owlrl_duplications.py: 80%

86 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-26 21:25 +0000

1"""Test to confirm OWL-RL reasoner behavior with pre-existing inferences.""" 

2 

3import logging 

4from rdflib import Dataset, Graph, Namespace, RDF, RDFS 

5from owlrl import DeductiveClosure 

6from owlrl.OWLRL import OWLRL_Semantics 

7 

8logging.basicConfig(level=logging.INFO) 

9logger = logging.getLogger(__name__) 

10 

11# Define a test namespace 

12EX = Namespace("http://example.org/") 

13 

14 

15def test_owlrl_with_preexisting_inferences_separate_graphs(): 

16 """Test if OWL-RL generates inferences that already exist in a different graph.""" 

17 logger.info("\n=== Test: Pre-existing inferences in separate graph ===") 

18 

19 # Create a dataset with two named graphs 

20 ds = Dataset() 

21 

22 # Graph 1: Contains base data 

23 g_data = ds.graph(EX.data) 

24 g_data.add((EX.Cat, RDFS.subClassOf, EX.Animal)) 

25 g_data.add((EX.Animal, RDFS.subClassOf, EX.LivingThing)) 

26 g_data.add((EX.fluffy, RDF.type, EX.Cat)) 

27 

28 logger.info("Initial data graph has %d triples:", len(g_data)) 

29 for triple in g_data: 

30 logger.info(" %s", triple) 

31 

32 # Graph 2: Pre-existing inferences (simulating what Step 2 does) 

33 g_preexisting = ds.graph(EX.preexisting_inferences) 

34 # This is what the reasoner WOULD infer from g_data 

35 g_preexisting.add((EX.fluffy, RDF.type, EX.Animal)) # via Cat subClassOf Animal 

36 g_preexisting.add((EX.Cat, RDFS.subClassOf, EX.LivingThing)) # transitive 

37 

38 logger.info("Pre-existing inferences graph has %d triples:", len(g_preexisting)) 

39 for triple in g_preexisting: 

40 logger.info(" %s", triple) 

41 

42 # Graph 3: Destination for NEW inferences (simulating what Step 3 does) 

43 g_new_inferences = ds.graph(EX.new_inferences) 

44 

45 logger.info("\nDataset before reasoning has %d total triples", len(ds)) 

46 

47 # Now run inference over the ENTIRE dataset (which includes both g_data and g_preexisting) 

48 # This simulates Step 3, where we reason over a dataset that already contains 

49 # the external inferences from Step 2 

50 logger.info("\nRunning OWL-RL over entire dataset into new_inferences graph...") 

51 DeductiveClosure(OWLRL_Semantics).expand(ds, g_new_inferences) 

52 

53 logger.info("\nAfter reasoning:") 

54 logger.info(" Dataset total: %d triples", len(ds)) 

55 logger.info(" Data graph: %d triples", len(g_data)) 

56 logger.info(" Pre-existing inferences: %d triples", len(g_preexisting)) 

57 logger.info(" NEW inferences graph: %d triples", len(g_new_inferences)) 

58 

59 logger.info("\nNEW inferences generated:") 

60 for triple in g_new_inferences: 

61 logger.info(" %s", triple) 

62 

63 # Check if the pre-existing inferences were duplicated into new_inferences 

64 duplicates = [] 

65 for s, p, o in g_preexisting: 

66 if (s, p, o) in g_new_inferences: 

67 duplicates.append((s, p, o)) 

68 

69 logger.info("\n=== RESULT ===") 

70 if duplicates: 

71 logger.info( 

72 "DUPLICATES FOUND: %d triples were in both graphs:", len(duplicates) 

73 ) 

74 for triple in duplicates: 

75 logger.info(" %s", triple) 

76 logger.info( 

77 "Conclusion: OWL-RL DOES duplicate inferences into destination graph" 

78 ) 

79 else: 

80 logger.info("NO DUPLICATES: Pre-existing inferences were NOT duplicated") 

81 logger.info( 

82 "Conclusion: OWL-RL does NOT duplicate inferences that already exist in input" 

83 ) 

84 

85 return len(duplicates) == 0 # True if no duplicates 

86 

87 

88def test_owlrl_with_preexisting_inferences_same_graph(): 

89 """Test if OWL-RL generates inferences that already exist in the destination graph.""" 

90 logger.info("\n\n=== Test: Pre-existing inferences in destination graph ===") 

91 

92 # Create a simple graph with base data 

93 g = Graph() 

94 g.add((EX.Cat, RDFS.subClassOf, EX.Animal)) 

95 g.add((EX.Animal, RDFS.subClassOf, EX.LivingThing)) 

96 g.add((EX.fluffy, RDF.type, EX.Cat)) 

97 

98 logger.info("Initial graph has %d triples:", len(g)) 

99 for triple in g: 

100 logger.info(" %s", triple) 

101 

102 # Manually add an inference that the reasoner would normally generate 

103 g.add((EX.fluffy, RDF.type, EX.Animal)) 

104 logger.info("\nManually added inference: (fluffy, type, Animal)") 

105 logger.info("Graph now has %d triples", len(g)) 

106 

107 # Now run inference - will it add the same triple again? 

108 logger.info("\nRunning OWL-RL over graph (destination = same graph)...") 

109 triples_before = len(g) 

110 DeductiveClosure(OWLRL_Semantics).expand(g) 

111 triples_after = len(g) 

112 

113 logger.info("\n=== RESULT ===") 

114 logger.info("Triples before: %d", triples_before) 

115 logger.info("Triples after: %d", triples_after) 

116 logger.info("New triples: %d", triples_after - triples_before) 

117 

118 logger.info("\nAll triples after reasoning:") 

119 for triple in g: 

120 logger.info(" %s", triple) 

121 

122 # RDFlib automatically deduplicates, so we can't tell if owlrl tried to add it 

123 logger.info("\nNote: RDFlib graphs auto-deduplicate, so we can't detect if OWL-RL") 

124 logger.info("attempted to add the same triple twice") 

125 

126 

127if __name__ == "__main__": 

128 # Run both tests 

129 no_duplicates_separate = test_owlrl_with_preexisting_inferences_separate_graphs() 

130 test_owlrl_with_preexisting_inferences_same_graph() 

131 

132 print("\n" + "=" * 80) 

133 print("SUMMARY") 

134 print("=" * 80) 

135 if no_duplicates_separate: 

136 print( 

137 "✓ OWL-RL does NOT duplicate inferences from input into destination graph" 

138 ) 

139 print(" This means Step 6 removing 0 triples is EXPECTED behavior!") 

140 print( 

141 " The external inferences are already in a separate graph (IRI_EXTERNAL_INFERENCES)" 

142 ) 

143 print(" and the reasoner doesn't put them into IRI_FULL_INFERENCES.") 

144 else: 

145 print( 

146 "✗ OWL-RL DOES duplicate inferences, so Step 6 should be removing triples" 

147 ) 

148 print(" The fact that it removes 0 suggests a different problem.")