Coverage for src / pythinfer / rdflibplus.py: 77%
88 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-26 21:27 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-26 21:27 +0000
1"""Extensions to rdflib for pythinfer."""
3from collections.abc import Generator
5from rdflib import Dataset, Graph, IdentifiedNode
6from rdflib.graph import (
7 _ContextIdentifierType, # pyright: ignore[reportPrivateUsage]
8 _ContextType, # pyright: ignore[reportPrivateUsage]
9 _OptionalIdentifiedQuadType,
10 _TripleOrOptionalQuadType,
11 _TripleOrQuadPatternType,
12 _TripleType,
13)
16class DatasetView(Dataset):
17 """A Dataset subclass that acts as a restricted view on selected named graphs.
19 This behaves like a Dataset, but any operations are limited to a specified
20 subset of the named graphs in the original Dataset. The data is *not copied*, the
21 same underlying store is used, so changes to the graphs in the view are reflected
22 in the original Dataset, and vice versa.
24 Adding and removing graphs from the view abides by the original Dataset API, except
25 that only graphs in the included set can be accessed. Trying to add or remove a
26 graph not in the included set will raise a PermissionError.
28 To include or exclude graphs from the view after creation, use the `include_graph`
29 and `exclude_graph` methods.
30 """
32 def __init__(
33 self,
34 original_ds: Dataset,
35 included_graph_ids: list[IdentifiedNode],
36 ) -> None:
37 """Initialize the Dataset view containing a pointer to the original Dataset."""
38 super().__init__(
39 store=original_ds.store,
40 default_union=original_ds.default_union,
41 )
42 self.included_graph_ids = included_graph_ids
44 def graph(
45 self,
46 identifier: IdentifiedNode | Graph | str | None = None,
47 base: str | None = None,
48 ) -> Graph:
49 """Get a named graph from the view."""
50 _id = identifier.identifier if isinstance(identifier, Graph) else identifier
51 if _id in self.included_graph_ids:
52 return super().graph(identifier, base=base)
53 msg = f"Graph {_id} is not visible in this view."
54 raise PermissionError(msg)
56 def __len__(self) -> int:
57 """Get the total number of triples in the view."""
58 total = 0
59 for gid in self.included_graph_ids:
60 total += len(super().graph(gid))
61 return total
63 def invert(self) -> "DatasetView":
64 """Return a new DatasetView with all graphs excluded from this view.
66 Creates a new view that includes only the graphs that are NOT in this
67 view's included set. This is useful for separating internal and external
68 graphs, or for creating complementary views of a dataset.
70 Returns:
71 DatasetView with all graphs from the store except those in this view.
73 Example:
74 >>> ds = Dataset()
75 >>> g1 = ds.graph(URIRef("http://example.org/g1"))
76 >>> g2 = ds.graph(URIRef("http://example.org/g2"))
77 >>> view = DatasetView(ds, [URIRef("http://example.org/g1")])
78 >>> inverted = view.invert()
79 >>> # inverted now contains only g2
81 """
82 all_graph_ids = [ctx.identifier for ctx in self.store.contexts()]
83 excluded_ids = [
84 gid for gid in all_graph_ids if gid not in self.included_graph_ids
85 ]
86 # Create new DatasetView from a temporary Dataset wrapper with same store
87 temp_ds = Dataset(store=self.store, default_union=self.default_union)
88 return DatasetView(temp_ds, excluded_ids)
90 def graphs(
91 self,
92 triple: _TripleType | None = None,
93 ) -> Generator[Graph, None, None]:
94 """Return graphs in this view, optionally filtered by triple pattern."""
95 # Get all graphs from parent, but only yield those in our included list
96 for g in super().graphs(triple):
97 if g.identifier in self.included_graph_ids:
98 yield g
100 def quads(
101 self,
102 quad: _TripleOrQuadPatternType | None = None,
103 ) -> Generator[_OptionalIdentifiedQuadType, None, None]:
104 """Return quads matching the pattern from included graphs only."""
105 for q in super().quads(quad):
106 if q[3] in self.included_graph_ids:
107 yield q
109 # The type-checkers don't like that we are not handling the overloads in the
110 # superclass method that handle graph Paths. TODO.
111 def triples(
112 self,
113 triple_or_quad: _TripleOrQuadPatternType = (None, None, None),
114 context: _ContextType | None = None,
115 ) -> Generator[_TripleType, None, None]:
116 """Return triples matching the pattern from included graphs only."""
117 if context is not None:
118 # If context is specified, only return triples from that graph
119 # if it's in the included graphs
120 if context.identifier in self.included_graph_ids: 120 ↛ exitline 120 didn't return from function 'triples' because the condition on line 120 was always true
121 yield from context.triples(triple_or_quad[0:3])
122 elif len(triple_or_quad) == 4 and triple_or_quad[3] is not None: # noqa: PLR2004 122 ↛ 124line 122 didn't jump to line 124 because the condition on line 122 was never true
123 # Quad pattern with specific graph - only query that graph
124 graph_id = triple_or_quad[3]
125 # According to rdflib typing, graph_id can only be Graph here, but I do not
126 # trust rdflib's typing...
127 if isinstance(graph_id, Graph): # pyright: ignore[reportUnnecessaryIsInstance]
128 graph_id = graph_id.identifier
129 if graph_id in self.included_graph_ids:
130 g = super().graph(graph_id)
131 yield from g.triples(triple_or_quad[:3])
132 else:
133 # No context and no graph specified in pattern - return from all
134 # Call triples() on each graph directly to avoid triggering rdflib's
135 # internal contexts() enumeration which tries to access default graph.
136 for gid in self.included_graph_ids:
137 g = super().graph(gid)
138 yield from g.triples(triple_or_quad[:3])
140 def add(
141 self: "DatasetView",
142 triple_or_quad: _TripleOrOptionalQuadType,
143 ) -> "DatasetView":
144 """Add a triple or quad to the store.
146 if a triple is given it is added to the default context
148 If the graph is not in the included set, raise PermissionError.
149 """
150 graph_id = self.default_graph.identifier
151 if len(triple_or_quad) == 4: # noqa: PLR2004
152 graph_id = triple_or_quad[3]
153 if graph_id not in self.included_graph_ids:
154 msg = f"Cannot add to graph {graph_id}: not visible in this view."
155 raise PermissionError(msg)
156 return super().add(triple_or_quad)
158 def remove(
159 self: "DatasetView",
160 triple_or_quad: _TripleOrQuadPatternType,
161 ) -> "DatasetView":
162 """Remove a triple or quads.
164 If the graph is not in the included set, raise PermissionError.
165 The graph is either that specified explicitly in the quad, or the default graph
167 Otherwise, behaviour is as per Dataset.remove():
168 If a triple is given it is removed from all named graphs.
169 If a quad is given it is removed from the specified named graph.
171 """
172 graph_id = self.default_graph.identifier
173 if len(triple_or_quad) == 4: # noqa: PLR2004
174 graph_id = triple_or_quad[3]
175 if graph_id not in self.included_graph_ids:
176 msg = f"Cannot add to graph {graph_id}: not visible in this view."
177 raise PermissionError(msg)
178 # For some bizarre reason, rdflib's Dataset.remove() is typed with
179 # _TripleOrOptionalQuadType, but actually accepts _TripleOrQuadPatternType which
180 # is what the superclass (Graph) remove() method uses.
181 return super().remove(triple_or_quad) # pyright: ignore[reportArgumentType]
183 def remove_graph(
184 self,
185 g: _ContextIdentifierType | _ContextType | str | None,
186 ) -> "DatasetView":
187 """Remove a graph from the store, if visible in this view."""
188 graph_id = g
189 if isinstance(g, Graph):
190 graph_id = g.identifier
191 elif g is None:
192 graph_id = self.default_graph.identifier
194 if graph_id not in self.included_graph_ids:
195 msg = f"Cannot remove graph {graph_id}: not visible in this view."
196 raise PermissionError(msg)
197 return super().remove_graph(g)
199 # I think pyright is incorrectly seeing only a specific overload of
200 # `Dataset.serialize` and thus incorrectly reporting an incompatible override.
201 def serialize( # pyright: ignore[reportIncompatibleMethodOverride]
202 self,
203 destination: str | None = None,
204 format: str = "xml", # noqa: A002
205 base: str | None = None,
206 encoding: str | None = None,
207 **args: object,
208 ) -> bytes | str | Graph:
209 """Serialize the DatasetView to a destination.
211 Only graphs in the included_graph_ids will be serialized. This requires
212 creating a temporary Dataset to work around rdflib's serializers accessing
213 the store directly instead of using our overridden quads() method.
215 The signature matches rdflib.Dataset.serialize().
216 """
217 # Create a temporary dataset with only the included graphs.
218 # This is necessary because rdflib serializers bypass our quads() override
219 # and access the store directly.
220 temp_ds = Dataset()
221 for s, p, o, c in self.quads():
222 temp_ds.add((s, p, o, c)) # type: ignore[arg-type]
224 # Copy namespace bindings from the source dataset to preserve prefixes
225 for prefix, namespace in self.namespaces():
226 temp_ds.bind(prefix, namespace)
228 # Serialize the temporary dataset
229 return temp_ds.serialize(
230 destination=destination,
231 format=format,
232 base=base,
233 encoding=encoding,
234 **args,
235 )
237 def collapse(self) -> Dataset:
238 """Create a new Dataset with this view's triples in the default graph.
240 This is useful for working around bugs in reasoning libraries that behave
241 differently when reasoning over named graphs vs the default graph.
242 See: https://github.com/RDFLib/OWL-RL/issues/76
244 Returns:
245 A new Dataset containing all triples from this view in its default graph.
247 """
248 temp_ds = Dataset()
249 # Copy all triples from this view into the default graph
250 for s, p, o in self.triples():
251 temp_ds.add((s, p, o))
252 return temp_ds
255def graph_lengths(ds: Dataset) -> dict[IdentifiedNode, int]:
256 """Get lengths of all named graphs in a Dataset."""
257 lengths: dict[IdentifiedNode, int] = {}
258 for g in ds.graphs():
259 lengths[g.identifier] = len(g)
260 return lengths