Coverage for src / pythinfer / data.py: 0%
32 statements
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-26 21:27 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2025-11-26 21:27 +0000
1"""Takes care of finding and loading files, and saving data back to disk."""
3# TODO: merge into inout.py
4import logging
5from pathlib import Path
7from rdflib import Graph
9logger = logging.getLogger(__name__)
10info = logger.info
11dbg = debug = logger.debug
13SCRIPT_DIR = Path(__file__).parent
14UNNECESSARY_INFERENCES_FILE = SCRIPT_DIR / "known_unecessary_inferences.ttl"
17def load_graphs(input_files: list[Path]) -> Graph:
18 """Load and merge multiple RDF files into a single graph.
20 TODO: merge with inout functionality - likely this can just be deleted.
22 Args:
23 input_files: List of TTL file paths to merge
25 Returns:
26 Graph: Merged RDF graph
28 """
29 # Create a new graph
30 merged_graph = Graph()
32 # Parse and merge each input file
33 prev_ntriples = 0
34 for file_path in input_files:
35 dbg(" Loading: %s", file_path)
36 try:
37 merged_graph.parse(file_path, format="turtle")
38 ntriples = len(merged_graph)
39 new_ntriples = ntriples - prev_ntriples
40 prev_ntriples = ntriples
41 dbg(" ✓ Successfully loaded %d new triples", new_ntriples)
42 except Exception:
43 logger.exception(" ✗ Error loading %s", file_path)
44 continue
46 return merged_graph
49def load_unnecessary_inferences() -> Graph:
50 """Load unnecessary inferences from preconfigured location."""
51 known_unnecessary = Graph()
52 known_unnecessary.parse(UNNECESSARY_INFERENCES_FILE)
53 return known_unnecessary
56def save_graph(
57 graph: Graph,
58 output_file: Path,
59 namespaces: dict[str, str] | None = None,
60) -> Graph:
61 """Save graph to a file. Use this to keep formatting identical.
63 NB: canon longTurtle is not great with the way it orders things, so
64 we might need to call out to riot unfortunately.
66 Args:
67 graph: RDF graph to save
68 output_file: Path to save the graph
69 namespaces: Optional dict of prefix->namespace bindings to apply before saving
71 """
72 if namespaces:
73 for prefix, namespace in namespaces.items():
74 graph.bind(prefix, namespace)
75 return graph.serialize(destination=output_file, format="longturtle", canon=True)