Coverage for src / pythinfer / cli.py: 0%

67 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-26 21:27 +0000

1"""pythinfer CLI entry point.""" 

2 

3import logging 

4from pathlib import Path 

5 

6import typer 

7from rdflib import Dataset, IdentifiedNode 

8 

9from pythinfer.infer import run_inference_backend 

10from pythinfer.inout import Project, create_project, load_project 

11from pythinfer.merge import ( 

12 merge_graphs, 

13) 

14from pythinfer.rdflibplus import DatasetView, graph_lengths 

15 

16app = typer.Typer() 

17logger = logging.getLogger(__name__) 

18 

19 

20def configure_logging(verbose: bool) -> None: 

21 """Configure logging level based on verbose flag.""" 

22 level = logging.DEBUG if verbose else logging.INFO 

23 logging.basicConfig( 

24 level=level, 

25 format="%(asctime)s %(levelname)-8s [%(name)s] %(message)s", 

26 datefmt="%Y-%m-%dT%H:%M:%S", 

27 force=True, # Reconfigure if already configured 

28 ) 

29 

30 

31@app.callback() 

32def main_callback( 

33 verbose: bool = typer.Option( 

34 False, 

35 "--verbose", 

36 "-v", 

37 help="Enable verbose (DEBUG) logging output", 

38 ), 

39) -> None: 

40 """Global options for pythinfer CLI.""" 

41 configure_logging(verbose) 

42 

43 

44@app.command() 

45def create( 

46 directory: Path | None = None, 

47 output: Path | None = None, 

48) -> None: 

49 """Create a new pythinfer project file by scanning for RDF files. 

50 

51 Scans the specified directory (or current directory) for RDF files 

52 and generates a pythinfer.yaml configuration file. 

53 

54 Args: 

55 directory: Directory to scan for RDF files (default: current directory). 

56 output: Path to create the project file (default: pythinfer.yaml). 

57 

58 """ 

59 _output = output or Path("pythinfer.yaml") 

60 

61 config_path = create_project(scan_directory=directory, output_path=_output) 

62 

63 typer.secho( 

64 f"✓ Created project file at: {config_path}", 

65 fg=typer.colors.GREEN, 

66 ) 

67 typer.echo(f"Project name: {config_path.parent.name}") 

68 

69 

70@app.command() 

71def merge( 

72 config: Path | None = None, 

73 output: Path | None = None, 

74 *, 

75 exclude_external: bool = True, 

76) -> tuple[Dataset, list[IdentifiedNode], Project]: 

77 """Merge graphs as specified in the config file and export.""" 

78 project = load_project(config) 

79 

80 if output is None: 

81 output = project.path_self.parent / "derived" / "merged.trig" 

82 output.parent.mkdir(parents=True, exist_ok=True) 

83 

84 typer.echo(f"Merging RDF graphs using project at `{project.path_self}`") 

85 typer.secho(f"Project loaded: {project}", fg=typer.colors.GREEN) 

86 ds, external_graph_ids = merge_graphs(project) 

87 

88 # Calculate lengths by category 

89 ext_len = sum(len(ds.graph(gid)) for gid in external_graph_ids) 

90 internal_len = len(ds) - ext_len 

91 

92 typer.secho( 

93 "Merged graph lengths:" 

94 f"\n\texternal: {ext_len: 4d}" 

95 f"\n\tinternal: {internal_len: 4d}" 

96 f"\n\tmerged: {len(ds): 4d}", 

97 fg=typer.colors.GREEN, 

98 ) 

99 

100 output_ds = ds 

101 if exclude_external: 

102 external_view = DatasetView(ds, external_graph_ids) 

103 output_ds = external_view.invert() 

104 output_ds.serialize(destination=output, format="trig", canon=True) 

105 typer.echo(f"Exported {len(output_ds)} triples to '{output}'") 

106 

107 return ds, external_graph_ids, project 

108 

109 

110@app.command() 

111def infer( 

112 config: Path | None = None, 

113 backend: str = "owlrl", 

114 output: Path | None = None, 

115 *, 

116 include_unwanted_triples: bool = False, 

117 include_external: bool = False, 

118) -> tuple[Dataset, list[IdentifiedNode]]: 

119 """Run inference backends on merged graph.""" 

120 ds, external_graph_ids, project = merge(config) 

121 project.owl_backend = backend 

122 typer.echo( 

123 f"Running inference using config: {project.path_self} and backend: {backend}" 

124 ) 

125 

126 # Run inference and get updated external graph IDs (includes inference graphs) 

127 all_external_ids = run_inference_backend( 

128 ds, 

129 external_graph_ids, 

130 project, 

131 include_unwanted_triples=include_unwanted_triples, 

132 ) 

133 typer.secho( 

134 f"Inference complete. {len(ds)} total triples in dataset", 

135 fg=typer.colors.GREEN, 

136 ) 

137 

138 if output is None: 

139 output = project.path_self.parent / "derived" / f"inferred_{backend}.trig" 

140 output.parent.mkdir(parents=True, exist_ok=True) 

141 

142 final_ds = ds 

143 if not include_external: 

144 external_view = DatasetView(ds, all_external_ids) 

145 final_ds = external_view.invert() 

146 final_ds.serialize(destination=output, format="trig") 

147 typer.echo( 

148 f"Exported {len(final_ds)} inferred triples to '{output}'", 

149 ) 

150 

151 # Calculate lengths by category 

152 ext_len = sum(len(ds.graph(gid)) for gid in all_external_ids) 

153 internal_len = len(ds) - ext_len 

154 

155 typer.secho( 

156 "Graph breakdown:" 

157 f"\n\texternal (incl. inferences): {ext_len: 4d}" 

158 f"\n\tinternal (incl. inferences): {internal_len: 4d}", 

159 fg=typer.colors.GREEN, 

160 ) 

161 typer.secho("Named graph breakdown:", fg=typer.colors.YELLOW) 

162 typer.secho(f"{'Graph':60s} Length", fg=typer.colors.YELLOW, bold=True) 

163 for gid, length in graph_lengths(ds).items(): 

164 typer.secho(f"{gid.n3():60s} {length: 4d}", fg=typer.colors.YELLOW) 

165 

166 return ds, all_external_ids 

167 

168 

169if __name__ == "__main__": 

170 app()