Coverage for src / pythinfer / data.py: 0%

32 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-26 21:27 +0000

1"""Takes care of finding and loading files, and saving data back to disk.""" 

2 

3# TODO: merge into inout.py 

4import logging 

5from pathlib import Path 

6 

7from rdflib import Graph 

8 

9logger = logging.getLogger(__name__) 

10info = logger.info 

11dbg = debug = logger.debug 

12 

13SCRIPT_DIR = Path(__file__).parent 

14UNNECESSARY_INFERENCES_FILE = SCRIPT_DIR / "known_unecessary_inferences.ttl" 

15 

16 

17def load_graphs(input_files: list[Path]) -> Graph: 

18 """Load and merge multiple RDF files into a single graph. 

19 

20 TODO: merge with inout functionality - likely this can just be deleted. 

21 

22 Args: 

23 input_files: List of TTL file paths to merge 

24 

25 Returns: 

26 Graph: Merged RDF graph 

27 

28 """ 

29 # Create a new graph 

30 merged_graph = Graph() 

31 

32 # Parse and merge each input file 

33 prev_ntriples = 0 

34 for file_path in input_files: 

35 dbg(" Loading: %s", file_path) 

36 try: 

37 merged_graph.parse(file_path, format="turtle") 

38 ntriples = len(merged_graph) 

39 new_ntriples = ntriples - prev_ntriples 

40 prev_ntriples = ntriples 

41 dbg(" ✓ Successfully loaded %d new triples", new_ntriples) 

42 except Exception: 

43 logger.exception(" ✗ Error loading %s", file_path) 

44 continue 

45 

46 return merged_graph 

47 

48 

49def load_unnecessary_inferences() -> Graph: 

50 """Load unnecessary inferences from preconfigured location.""" 

51 known_unnecessary = Graph() 

52 known_unnecessary.parse(UNNECESSARY_INFERENCES_FILE) 

53 return known_unnecessary 

54 

55 

56def save_graph( 

57 graph: Graph, 

58 output_file: Path, 

59 namespaces: dict[str, str] | None = None, 

60) -> Graph: 

61 """Save graph to a file. Use this to keep formatting identical. 

62 

63 NB: canon longTurtle is not great with the way it orders things, so 

64 we might need to call out to riot unfortunately. 

65 

66 Args: 

67 graph: RDF graph to save 

68 output_file: Path to save the graph 

69 namespaces: Optional dict of prefix->namespace bindings to apply before saving 

70 

71 """ 

72 if namespaces: 

73 for prefix, namespace in namespaces.items(): 

74 graph.bind(prefix, namespace) 

75 return graph.serialize(destination=output_file, format="longturtle", canon=True)