Coverage for src / pythinfer / rdflibplus.py: 77%

88 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-26 21:27 +0000

1"""Extensions to rdflib for pythinfer.""" 

2 

3from collections.abc import Generator 

4 

5from rdflib import Dataset, Graph, IdentifiedNode 

6from rdflib.graph import ( 

7 _ContextIdentifierType, # pyright: ignore[reportPrivateUsage] 

8 _ContextType, # pyright: ignore[reportPrivateUsage] 

9 _OptionalIdentifiedQuadType, 

10 _TripleOrOptionalQuadType, 

11 _TripleOrQuadPatternType, 

12 _TripleType, 

13) 

14 

15 

16class DatasetView(Dataset): 

17 """A Dataset subclass that acts as a restricted view on selected named graphs. 

18 

19 This behaves like a Dataset, but any operations are limited to a specified 

20 subset of the named graphs in the original Dataset. The data is *not copied*, the 

21 same underlying store is used, so changes to the graphs in the view are reflected 

22 in the original Dataset, and vice versa. 

23 

24 Adding and removing graphs from the view abides by the original Dataset API, except 

25 that only graphs in the included set can be accessed. Trying to add or remove a 

26 graph not in the included set will raise a PermissionError. 

27 

28 To include or exclude graphs from the view after creation, use the `include_graph` 

29 and `exclude_graph` methods. 

30 """ 

31 

32 def __init__( 

33 self, 

34 original_ds: Dataset, 

35 included_graph_ids: list[IdentifiedNode], 

36 ) -> None: 

37 """Initialize the Dataset view containing a pointer to the original Dataset.""" 

38 super().__init__( 

39 store=original_ds.store, 

40 default_union=original_ds.default_union, 

41 ) 

42 self.included_graph_ids = included_graph_ids 

43 

44 def graph( 

45 self, 

46 identifier: IdentifiedNode | Graph | str | None = None, 

47 base: str | None = None, 

48 ) -> Graph: 

49 """Get a named graph from the view.""" 

50 _id = identifier.identifier if isinstance(identifier, Graph) else identifier 

51 if _id in self.included_graph_ids: 

52 return super().graph(identifier, base=base) 

53 msg = f"Graph {_id} is not visible in this view." 

54 raise PermissionError(msg) 

55 

56 def __len__(self) -> int: 

57 """Get the total number of triples in the view.""" 

58 total = 0 

59 for gid in self.included_graph_ids: 

60 total += len(super().graph(gid)) 

61 return total 

62 

63 def invert(self) -> "DatasetView": 

64 """Return a new DatasetView with all graphs excluded from this view. 

65 

66 Creates a new view that includes only the graphs that are NOT in this 

67 view's included set. This is useful for separating internal and external 

68 graphs, or for creating complementary views of a dataset. 

69 

70 Returns: 

71 DatasetView with all graphs from the store except those in this view. 

72 

73 Example: 

74 >>> ds = Dataset() 

75 >>> g1 = ds.graph(URIRef("http://example.org/g1")) 

76 >>> g2 = ds.graph(URIRef("http://example.org/g2")) 

77 >>> view = DatasetView(ds, [URIRef("http://example.org/g1")]) 

78 >>> inverted = view.invert() 

79 >>> # inverted now contains only g2 

80 

81 """ 

82 all_graph_ids = [ctx.identifier for ctx in self.store.contexts()] 

83 excluded_ids = [ 

84 gid for gid in all_graph_ids if gid not in self.included_graph_ids 

85 ] 

86 # Create new DatasetView from a temporary Dataset wrapper with same store 

87 temp_ds = Dataset(store=self.store, default_union=self.default_union) 

88 return DatasetView(temp_ds, excluded_ids) 

89 

90 def graphs( 

91 self, 

92 triple: _TripleType | None = None, 

93 ) -> Generator[Graph, None, None]: 

94 """Return graphs in this view, optionally filtered by triple pattern.""" 

95 # Get all graphs from parent, but only yield those in our included list 

96 for g in super().graphs(triple): 

97 if g.identifier in self.included_graph_ids: 

98 yield g 

99 

100 def quads( 

101 self, 

102 quad: _TripleOrQuadPatternType | None = None, 

103 ) -> Generator[_OptionalIdentifiedQuadType, None, None]: 

104 """Return quads matching the pattern from included graphs only.""" 

105 for q in super().quads(quad): 

106 if q[3] in self.included_graph_ids: 

107 yield q 

108 

109 # The type-checkers don't like that we are not handling the overloads in the 

110 # superclass method that handle graph Paths. TODO. 

111 def triples( 

112 self, 

113 triple_or_quad: _TripleOrQuadPatternType = (None, None, None), 

114 context: _ContextType | None = None, 

115 ) -> Generator[_TripleType, None, None]: 

116 """Return triples matching the pattern from included graphs only.""" 

117 if context is not None: 

118 # If context is specified, only return triples from that graph 

119 # if it's in the included graphs 

120 if context.identifier in self.included_graph_ids: 120 ↛ exitline 120 didn't return from function 'triples' because the condition on line 120 was always true

121 yield from context.triples(triple_or_quad[0:3]) 

122 elif len(triple_or_quad) == 4 and triple_or_quad[3] is not None: # noqa: PLR2004 122 ↛ 124line 122 didn't jump to line 124 because the condition on line 122 was never true

123 # Quad pattern with specific graph - only query that graph 

124 graph_id = triple_or_quad[3] 

125 # According to rdflib typing, graph_id can only be Graph here, but I do not 

126 # trust rdflib's typing... 

127 if isinstance(graph_id, Graph): # pyright: ignore[reportUnnecessaryIsInstance] 

128 graph_id = graph_id.identifier 

129 if graph_id in self.included_graph_ids: 

130 g = super().graph(graph_id) 

131 yield from g.triples(triple_or_quad[:3]) 

132 else: 

133 # No context and no graph specified in pattern - return from all 

134 # Call triples() on each graph directly to avoid triggering rdflib's 

135 # internal contexts() enumeration which tries to access default graph. 

136 for gid in self.included_graph_ids: 

137 g = super().graph(gid) 

138 yield from g.triples(triple_or_quad[:3]) 

139 

140 def add( 

141 self: "DatasetView", 

142 triple_or_quad: _TripleOrOptionalQuadType, 

143 ) -> "DatasetView": 

144 """Add a triple or quad to the store. 

145 

146 if a triple is given it is added to the default context 

147 

148 If the graph is not in the included set, raise PermissionError. 

149 """ 

150 graph_id = self.default_graph.identifier 

151 if len(triple_or_quad) == 4: # noqa: PLR2004 

152 graph_id = triple_or_quad[3] 

153 if graph_id not in self.included_graph_ids: 

154 msg = f"Cannot add to graph {graph_id}: not visible in this view." 

155 raise PermissionError(msg) 

156 return super().add(triple_or_quad) 

157 

158 def remove( 

159 self: "DatasetView", 

160 triple_or_quad: _TripleOrQuadPatternType, 

161 ) -> "DatasetView": 

162 """Remove a triple or quads. 

163 

164 If the graph is not in the included set, raise PermissionError. 

165 The graph is either that specified explicitly in the quad, or the default graph 

166 

167 Otherwise, behaviour is as per Dataset.remove(): 

168 If a triple is given it is removed from all named graphs. 

169 If a quad is given it is removed from the specified named graph. 

170 

171 """ 

172 graph_id = self.default_graph.identifier 

173 if len(triple_or_quad) == 4: # noqa: PLR2004 

174 graph_id = triple_or_quad[3] 

175 if graph_id not in self.included_graph_ids: 

176 msg = f"Cannot add to graph {graph_id}: not visible in this view." 

177 raise PermissionError(msg) 

178 # For some bizarre reason, rdflib's Dataset.remove() is typed with 

179 # _TripleOrOptionalQuadType, but actually accepts _TripleOrQuadPatternType which 

180 # is what the superclass (Graph) remove() method uses. 

181 return super().remove(triple_or_quad) # pyright: ignore[reportArgumentType] 

182 

183 def remove_graph( 

184 self, 

185 g: _ContextIdentifierType | _ContextType | str | None, 

186 ) -> "DatasetView": 

187 """Remove a graph from the store, if visible in this view.""" 

188 graph_id = g 

189 if isinstance(g, Graph): 

190 graph_id = g.identifier 

191 elif g is None: 

192 graph_id = self.default_graph.identifier 

193 

194 if graph_id not in self.included_graph_ids: 

195 msg = f"Cannot remove graph {graph_id}: not visible in this view." 

196 raise PermissionError(msg) 

197 return super().remove_graph(g) 

198 

199 # I think pyright is incorrectly seeing only a specific overload of 

200 # `Dataset.serialize` and thus incorrectly reporting an incompatible override. 

201 def serialize( # pyright: ignore[reportIncompatibleMethodOverride] 

202 self, 

203 destination: str | None = None, 

204 format: str = "xml", # noqa: A002 

205 base: str | None = None, 

206 encoding: str | None = None, 

207 **args: object, 

208 ) -> bytes | str | Graph: 

209 """Serialize the DatasetView to a destination. 

210 

211 Only graphs in the included_graph_ids will be serialized. This requires 

212 creating a temporary Dataset to work around rdflib's serializers accessing 

213 the store directly instead of using our overridden quads() method. 

214 

215 The signature matches rdflib.Dataset.serialize(). 

216 """ 

217 # Create a temporary dataset with only the included graphs. 

218 # This is necessary because rdflib serializers bypass our quads() override 

219 # and access the store directly. 

220 temp_ds = Dataset() 

221 for s, p, o, c in self.quads(): 

222 temp_ds.add((s, p, o, c)) # type: ignore[arg-type] 

223 

224 # Copy namespace bindings from the source dataset to preserve prefixes 

225 for prefix, namespace in self.namespaces(): 

226 temp_ds.bind(prefix, namespace) 

227 

228 # Serialize the temporary dataset 

229 return temp_ds.serialize( 

230 destination=destination, 

231 format=format, 

232 base=base, 

233 encoding=encoding, 

234 **args, 

235 ) 

236 

237 def collapse(self) -> Dataset: 

238 """Create a new Dataset with this view's triples in the default graph. 

239 

240 This is useful for working around bugs in reasoning libraries that behave 

241 differently when reasoning over named graphs vs the default graph. 

242 See: https://github.com/RDFLib/OWL-RL/issues/76 

243 

244 Returns: 

245 A new Dataset containing all triples from this view in its default graph. 

246 

247 """ 

248 temp_ds = Dataset() 

249 # Copy all triples from this view into the default graph 

250 for s, p, o in self.triples(): 

251 temp_ds.add((s, p, o)) 

252 return temp_ds 

253 

254 

255def graph_lengths(ds: Dataset) -> dict[IdentifiedNode, int]: 

256 """Get lengths of all named graphs in a Dataset.""" 

257 lengths: dict[IdentifiedNode, int] = {} 

258 for g in ds.graphs(): 

259 lengths[g.identifier] = len(g) 

260 return lengths