Coverage for tests / integration / test_create_project_command.py: 98%

103 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2025-11-26 21:25 +0000

1"""Integration tests for the create_project command.""" 

2 

3from pathlib import Path 

4from tempfile import TemporaryDirectory 

5 

6import pytest 

7import yaml 

8 

9from pythinfer.inout import PROJECT_FILE_NAME, create_project 

10 

11 

12class TestCreateProjectCommand: 

13 """Test the create_project command functionality.""" 

14 

15 def test_create_project_scans_current_directory_for_rdf_files( 

16 self, 

17 ) -> None: 

18 """Test that create_project scans directory and detects RDF files.""" 

19 with TemporaryDirectory() as tmpdir: 

20 tmp_path = Path(tmpdir) 

21 

22 # Create test RDF files with different extensions 

23 ttl_file1 = tmp_path / "model.ttl" 

24 ttl_file1.touch() 

25 

26 ttl_file2 = tmp_path / "data.ttl" 

27 ttl_file2.touch() 

28 

29 rdf_file = tmp_path / "vocab.rdf" 

30 rdf_file.touch() 

31 

32 # Create a non-RDF file that should be ignored 

33 txt_file = tmp_path / "readme.txt" 

34 txt_file.touch() 

35 

36 # Call create_project with explicit scan_directory 

37 config_path = create_project( 

38 scan_directory=tmp_path, 

39 output_path=tmp_path / PROJECT_FILE_NAME, 

40 ) 

41 

42 # Verify config file was created 

43 assert config_path.exists() 

44 assert config_path.name == PROJECT_FILE_NAME 

45 

46 # Load and verify contents 

47 with config_path.open() as f: 

48 config = yaml.safe_load(f) 

49 

50 # Should find all RDF files 

51 found_files = set(config.get("data", [])) 

52 assert ( 

53 "model.ttl" in found_files 

54 or str(ttl_file1.relative_to(tmp_path)) in found_files 

55 ) 

56 assert ( 

57 "data.ttl" in found_files 

58 or str(ttl_file2.relative_to(tmp_path)) in found_files 

59 ) 

60 assert ( 

61 "vocab.rdf" in found_files 

62 or str(rdf_file.relative_to(tmp_path)) in found_files 

63 ) 

64 

65 # Should not include non-RDF files 

66 assert not any("readme.txt" in str(f) for f in found_files) 

67 

68 def test_create_project_with_eg2_projects_example(self) -> None: 

69 """Test create_project using the eg2-projects example project.""" 

70 # Use the actual eg2-projects directory as test data 

71 eg2_path = ( 

72 Path(__file__).parent.parent.parent / "example_projects" / "eg2-projects" 

73 ) 

74 

75 if not eg2_path.exists(): 

76 pytest.skip("eg2-projects example directory not found") 

77 

78 # Path to expected output 

79 expected_config_path = eg2_path / "expected_pythinfer.yaml" 

80 if not expected_config_path.exists(): 

81 pytest.skip("expected_pythinfer.yaml not found in eg2-projects") 

82 

83 with TemporaryDirectory() as tmpdir: 

84 tmp_path = Path(tmpdir) 

85 

86 # Create output directory for generated project 

87 output_dir = tmp_path / "generated_project" 

88 output_dir.mkdir() 

89 

90 # Call create_project to scan eg2-projects 

91 config_path = create_project( 

92 scan_directory=eg2_path, 

93 output_path=output_dir / PROJECT_FILE_NAME, 

94 ) 

95 

96 # Verify config file was created 

97 assert config_path.exists() 

98 assert config_path.name == PROJECT_FILE_NAME 

99 

100 # Load both the generated and expected configs 

101 with config_path.open() as f: 

102 generated_config = yaml.safe_load(f) 

103 

104 with expected_config_path.open() as f: 

105 expected_config = yaml.safe_load(f) 

106 

107 # Compare the configurations 

108 assert generated_config == expected_config 

109 

110 def test_create_project_generates_valid_yaml(self) -> None: 

111 """Test that create_project generates valid YAML that can be loaded.""" 

112 with TemporaryDirectory() as tmpdir: 

113 tmp_path = Path(tmpdir) 

114 

115 # Create some RDF files 

116 (tmp_path / "data1.ttl").touch() 

117 (tmp_path / "vocab.rdf").touch() 

118 

119 # Create project 

120 config_path = create_project( 

121 scan_directory=tmp_path, 

122 output_path=tmp_path / PROJECT_FILE_NAME, 

123 ) 

124 

125 # Verify YAML is valid by loading it 

126 with config_path.open() as f: 

127 config = yaml.safe_load(f) 

128 

129 # Verify required fields exist 

130 assert "data" in config or "internal_vocabs" in config 

131 assert isinstance(config, dict) 

132 

133 def test_create_project_respects_output_path(self) -> None: 

134 """Test that create_project creates file at specified output path.""" 

135 with TemporaryDirectory() as tmpdir: 

136 tmp_path = Path(tmpdir) 

137 

138 # Create a subdirectory for output 

139 output_dir = tmp_path / "config" 

140 output_dir.mkdir() 

141 

142 # Create some RDF files in parent directory 

143 (tmp_path / "data.ttl").touch() 

144 

145 # Specify custom output path 

146 custom_config_path = output_dir / "custom.yaml" 

147 config_path = create_project( 

148 scan_directory=tmp_path, 

149 output_path=custom_config_path, 

150 ) 

151 

152 # Should create file at custom location 

153 assert config_path == custom_config_path 

154 assert config_path.exists() 

155 

156 def test_create_project_handles_nested_directories(self) -> None: 

157 """Test that create_project scans nested directories for RDF files.""" 

158 with TemporaryDirectory() as tmpdir: 

159 tmp_path = Path(tmpdir) 

160 

161 # Create nested structure 

162 subdir1 = tmp_path / "models" 

163 subdir1.mkdir() 

164 (subdir1 / "model1.ttl").touch() 

165 

166 subdir2 = tmp_path / "data" 

167 subdir2.mkdir() 

168 (subdir2 / "data1.rdf").touch() 

169 

170 # Create project 

171 config_path = create_project( 

172 scan_directory=tmp_path, 

173 output_path=tmp_path / PROJECT_FILE_NAME, 

174 ) 

175 

176 # Load and check for nested files 

177 with config_path.open() as f: 

178 config = yaml.safe_load(f) 

179 

180 data_files = config.get("data", []) 

181 file_names = [str(f).lower() for f in data_files] 

182 

183 # Should find files in subdirectories 

184 assert any("model1" in f for f in file_names) or any( 

185 "data1" in f for f in file_names 

186 ) 

187 

188 def test_create_project_with_no_rdf_files(self) -> None: 

189 """Test create_project behavior when no RDF files are found.""" 

190 with TemporaryDirectory() as tmpdir: 

191 tmp_path = Path(tmpdir) 

192 

193 # Create only non-RDF files 

194 (tmp_path / "readme.txt").touch() 

195 (tmp_path / "notes.md").touch() 

196 

197 # Should still create a config file, but with empty or minimal data 

198 config_path = create_project( 

199 scan_directory=tmp_path, 

200 output_path=tmp_path / PROJECT_FILE_NAME, 

201 ) 

202 

203 assert config_path.exists() 

204 

205 with config_path.open() as f: 

206 config = yaml.safe_load(f) 

207 

208 # Should have empty or missing data field 

209 assert config.get("data", []) == [] or "data" not in config 

210 

211 def test_create_project_excludes_derived_output(self) -> None: 

212 """Test that create_project excludes the 'derived' directory.""" 

213 with TemporaryDirectory() as tmpdir: 

214 tmp_path = Path(tmpdir) 

215 

216 # Create RDF files in root 

217 (tmp_path / "data.ttl").touch() 

218 

219 # Create a 'derived' directory with output files 

220 derived_dir = tmp_path / "derived" 

221 derived_dir.mkdir() 

222 (derived_dir / "inference_output.ttl").touch() 

223 

224 # Create project 

225 config_path = create_project(output_path=tmp_path / PROJECT_FILE_NAME) 

226 

227 # Load config 

228 with config_path.open() as f: 

229 config = yaml.safe_load(f) 

230 

231 data_files = config.get("data", []) 

232 # Should not include files from 'derived' directory 

233 assert not any("derived" in str(f) for f in data_files)