Coverage for intelligence_toolkit/tests/unit/generate_mock_data/test_schema_builder.py: 100%

262 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2# Licensed under the MIT license. See LICENSE file in the project. 

3 

4import pytest 

5import pandas as pd 

6from intelligence_toolkit.generate_mock_data.schema_builder import ( 

7 StringFormat, 

8 FieldType, 

9 ArrayFieldType, 

10 PrimitiveFieldType, 

11 ValidationResult, 

12 create_boilerplate_schema, 

13 add_object_field, 

14 add_array_field, 

15 add_primitive_field, 

16 get_subobject, 

17 get_required_list, 

18 set_string_min_length, 

19 set_string_max_length, 

20 set_string_pattern, 

21 set_string_format, 

22 clear_string_constraints, 

23 set_number_minimum, 

24 set_number_maximum, 

25 set_number_multiple_of, 

26 clear_number_constraints, 

27 rename_field, 

28 delete_field, 

29 move_field_up, 

30 move_field_down, 

31 set_required_field_status, 

32 set_enum_field_status, 

33 set_additional_field_status, 

34 generate_object_from_schema, 

35 convert_to_dataframe, 

36 evaluate_object_and_schema, 

37 evaluate_schema, 

38) 

39 

40 

41def test_string_format_enum(): 

42 assert hasattr(StringFormat, "DATE") 

43 assert hasattr(StringFormat, "EMAIL") 

44 assert hasattr(StringFormat, "UUID") 

45 assert StringFormat.DATE.value == "date" 

46 assert StringFormat.EMAIL.value == "email" 

47 

48 

49def test_field_type_enum(): 

50 assert hasattr(FieldType, "OBJECT") 

51 assert hasattr(FieldType, "ARRAY") 

52 assert hasattr(FieldType, "STRING") 

53 assert FieldType.STRING.value == "string" 

54 

55 

56def test_array_field_type_enum(): 

57 assert hasattr(ArrayFieldType, "OBJECT") 

58 assert hasattr(ArrayFieldType, "STRING") 

59 assert not hasattr(ArrayFieldType, "ARRAY") # No nested arrays 

60 

61 

62def test_primitive_field_type_enum(): 

63 assert hasattr(PrimitiveFieldType, "STRING") 

64 assert hasattr(PrimitiveFieldType, "NUMBER") 

65 assert hasattr(PrimitiveFieldType, "BOOLEAN") 

66 

67 

68def test_validation_result_enum(): 

69 assert hasattr(ValidationResult, "VALID") 

70 assert hasattr(ValidationResult, "SCHEMA_INVALID") 

71 assert hasattr(ValidationResult, "OBJECT_INVALID") 

72 

73 

74def test_create_boilerplate_schema(): 

75 schema = create_boilerplate_schema() 

76 

77 assert isinstance(schema, dict) 

78 assert "$schema" in schema 

79 assert "title" in schema 

80 assert "description" in schema 

81 assert "type" in schema 

82 assert schema["type"] == "object" 

83 assert "properties" in schema 

84 assert "records" in schema["properties"] 

85 

86 

87def test_create_boilerplate_schema_with_custom_values(): 

88 schema = create_boilerplate_schema( 

89 schema_field="custom_schema", 

90 title_field="Custom Title", 

91 description_field="Custom Description", 

92 ) 

93 

94 assert schema["$schema"] == "custom_schema" 

95 assert schema["title"] == "Custom Title" 

96 assert schema["description"] == "Custom Description" 

97 

98 

99def test_add_primitive_field_string(): 

100 schema = create_boilerplate_schema() 

101 field_location = schema["properties"]["records"]["items"]["properties"] 

102 

103 label = add_primitive_field( 

104 schema, field_location, "name", "A name field", PrimitiveFieldType.STRING 

105 ) 

106 

107 assert label in field_location 

108 assert field_location[label]["type"] == "string" 

109 assert field_location[label]["description"] == "A name field" 

110 

111 

112def test_add_primitive_field_number(): 

113 schema = create_boilerplate_schema() 

114 field_location = schema["properties"]["records"]["items"]["properties"] 

115 

116 label = add_primitive_field( 

117 schema, field_location, "age", "An age field", PrimitiveFieldType.NUMBER 

118 ) 

119 

120 assert label in field_location 

121 assert field_location[label]["type"] == "number" 

122 

123 

124def test_add_primitive_field_boolean(): 

125 schema = create_boilerplate_schema() 

126 field_location = schema["properties"]["records"]["items"]["properties"] 

127 

128 label = add_primitive_field( 

129 schema, field_location, "active", "Active status", PrimitiveFieldType.BOOLEAN 

130 ) 

131 

132 assert label in field_location 

133 assert field_location[label]["type"] == "boolean" 

134 

135 

136def test_add_object_field(): 

137 schema = create_boilerplate_schema() 

138 field_location = schema["properties"]["records"]["items"]["properties"] 

139 

140 label = add_object_field(schema, field_location, "address", "An address object") 

141 

142 assert label in field_location 

143 assert field_location[label]["type"] == "object" 

144 assert "properties" in field_location[label] 

145 assert field_location[label]["additionalProperties"] == False 

146 

147 

148def test_add_array_field_string(): 

149 schema = create_boilerplate_schema() 

150 field_location = schema["properties"]["records"]["items"]["properties"] 

151 

152 label = add_array_field( 

153 schema, field_location, "tags", "List of tags", ArrayFieldType.STRING 

154 ) 

155 

156 assert label in field_location 

157 assert field_location[label]["type"] == "array" 

158 assert field_location[label]["items"]["type"] == "string" 

159 

160 

161def test_add_array_field_object(): 

162 schema = create_boilerplate_schema() 

163 field_location = schema["properties"]["records"]["items"]["properties"] 

164 

165 label = add_array_field( 

166 schema, field_location, "contacts", "List of contacts", ArrayFieldType.OBJECT 

167 ) 

168 

169 assert label in field_location 

170 assert field_location[label]["type"] == "array" 

171 assert field_location[label]["items"]["type"] == "object" 

172 assert "properties" in field_location[label]["items"] 

173 

174 

175def test_get_subobject_root(): 

176 schema = create_boilerplate_schema() 

177 

178 result = get_subobject(schema, []) 

179 

180 assert result == schema["properties"] 

181 

182 

183def test_get_subobject_nested(): 

184 schema = create_boilerplate_schema() 

185 

186 result = get_subobject(schema, ["records"]) 

187 

188 assert "properties" in result or isinstance(result, dict) 

189 

190 

191def test_get_required_list(): 

192 schema = create_boilerplate_schema() 

193 

194 required = get_required_list(schema, []) 

195 

196 assert isinstance(required, list) 

197 assert "records" in required 

198 

199 

200def test_set_string_min_length(): 

201 field = {"type": "string"} 

202 

203 set_string_min_length(field, 5) 

204 

205 assert field["minLength"] == 5 

206 

207 

208def test_set_string_min_length_remove(): 

209 field = {"type": "string", "minLength": 5} 

210 

211 set_string_min_length(field, None) 

212 

213 assert "minLength" not in field 

214 

215 

216def test_set_string_max_length(): 

217 field = {"type": "string"} 

218 

219 set_string_max_length(field, 100) 

220 

221 assert field["maxLength"] == 100 

222 

223 

224def test_set_string_pattern(): 

225 field = {"type": "string"} 

226 

227 set_string_pattern(field, "^[A-Z]+$") 

228 

229 assert field["pattern"] == "^[A-Z]+$" 

230 

231 

232def test_set_string_format(): 

233 field = {"type": "string"} 

234 

235 set_string_format(field, StringFormat.EMAIL) 

236 

237 assert field["format"] == "email" 

238 

239 

240def test_set_string_format_remove(): 

241 field = {"type": "string", "format": "email"} 

242 

243 set_string_format(field, None) 

244 

245 assert "format" not in field 

246 

247 

248def test_clear_string_constraints(): 

249 field = { 

250 "type": "string", 

251 "minLength": 1, 

252 "maxLength": 100, 

253 "pattern": ".*", 

254 "format": "email", 

255 } 

256 

257 clear_string_constraints(field) 

258 

259 assert "minLength" not in field 

260 assert "maxLength" not in field 

261 assert "pattern" not in field 

262 assert "format" not in field 

263 

264 

265def test_set_number_minimum(): 

266 field = {"type": "number"} 

267 

268 set_number_minimum(field, 10, False) 

269 

270 assert field["minimum"] == 10 

271 assert "exclusiveMinimum" not in field 

272 

273 

274def test_set_number_minimum_exclusive(): 

275 field = {"type": "number"} 

276 

277 set_number_minimum(field, 10, True) 

278 

279 assert field["exclusiveMinimum"] == 10 

280 assert "minimum" not in field 

281 

282 

283def test_set_number_maximum(): 

284 field = {"type": "number"} 

285 

286 set_number_maximum(field, 100, False) 

287 

288 assert field["maximum"] == 100 

289 

290 

291def test_set_number_multiple_of(): 

292 field = {"type": "number"} 

293 

294 set_number_multiple_of(field, 5) 

295 

296 assert field["multipleOf"] == 5 

297 

298 

299def test_clear_number_constraints(): 

300 field = { 

301 "type": "number", 

302 "minimum": 0, 

303 "maximum": 100, 

304 "multipleOf": 5, 

305 } 

306 

307 clear_number_constraints(field) 

308 

309 assert "minimum" not in field 

310 assert "maximum" not in field 

311 assert "multipleOf" not in field 

312 

313 

314def test_rename_field(): 

315 schema = create_boilerplate_schema() 

316 field_location = schema["properties"]["records"]["items"]["properties"] 

317 add_primitive_field(schema, field_location, "oldname", "", PrimitiveFieldType.STRING) 

318 

319 rename_field(schema, field_location, ["records"], "oldname_1", "newname") 

320 

321 assert "newname" in field_location 

322 assert "oldname_1" not in field_location 

323 

324 

325def test_delete_field(): 

326 schema = create_boilerplate_schema() 

327 field_location = schema["properties"]["records"]["items"]["properties"] 

328 add_primitive_field(schema, field_location, "temp", "", PrimitiveFieldType.STRING) 

329 

330 delete_field(schema, ["records"], field_location, "temp_1") 

331 

332 assert "temp_1" not in field_location 

333 

334 

335def test_move_field_up(): 

336 schema = create_boilerplate_schema() 

337 field_location = schema["properties"]["records"]["items"]["properties"] 

338 add_primitive_field(schema, field_location, "first", "", PrimitiveFieldType.STRING) 

339 add_primitive_field(schema, field_location, "second", "", PrimitiveFieldType.STRING) 

340 

341 keys_before = list(field_location.keys()) 

342 move_field_up(schema, ["records"], field_location, keys_before[1]) 

343 keys_after = list(field_location.keys()) 

344 

345 # Second field should now be first 

346 assert keys_after[0] == keys_before[1] 

347 

348 

349def test_move_field_down(): 

350 schema = create_boilerplate_schema() 

351 field_location = schema["properties"]["records"]["items"]["properties"] 

352 add_primitive_field(schema, field_location, "first", "", PrimitiveFieldType.STRING) 

353 add_primitive_field(schema, field_location, "second", "", PrimitiveFieldType.STRING) 

354 

355 keys_before = list(field_location.keys()) 

356 move_field_down(schema, ["records"], field_location, keys_before[0]) 

357 keys_after = list(field_location.keys()) 

358 

359 # First field should now be second 

360 assert keys_after[1] == keys_before[0] 

361 

362 

363def test_set_required_field_status_add(): 

364 schema = create_boilerplate_schema() 

365 field_location = schema["properties"]["records"]["items"]["properties"] 

366 add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING) 

367 

368 set_required_field_status(schema, ["records"], "field_1", True) 

369 

370 required = get_required_list(schema, ["records"]) 

371 assert "field_1" in required 

372 

373 

374def test_set_required_field_status_remove(): 

375 schema = create_boilerplate_schema() 

376 field_location = schema["properties"]["records"]["items"]["properties"] 

377 add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING) 

378 set_required_field_status(schema, ["records"], "field_1", True) 

379 

380 set_required_field_status(schema, ["records"], "field_1", False) 

381 

382 required = get_required_list(schema, ["records"]) 

383 assert "field_1" not in required 

384 

385 

386def test_set_enum_field_status_string(): 

387 schema = create_boilerplate_schema() 

388 field_location = schema["properties"]["records"]["items"]["properties"] 

389 add_primitive_field(schema, field_location, "status", "", PrimitiveFieldType.STRING) 

390 

391 changed = set_enum_field_status(schema, ["records"], "status_1", True) 

392 

393 assert changed 

394 assert "enum" in field_location["status_1"] 

395 assert isinstance(field_location["status_1"]["enum"], list) 

396 

397 

398def test_set_enum_field_status_remove(): 

399 schema = create_boilerplate_schema() 

400 field_location = schema["properties"]["records"]["items"]["properties"] 

401 add_primitive_field(schema, field_location, "status", "", PrimitiveFieldType.STRING) 

402 set_enum_field_status(schema, ["records"], "status_1", True) 

403 

404 changed = set_enum_field_status(schema, ["records"], "status_1", False) 

405 

406 assert changed 

407 assert "enum" not in field_location["status_1"] 

408 

409 

410def test_set_additional_field_status(): 

411 schema = create_boilerplate_schema() 

412 field_location = schema["properties"]["records"]["items"]["properties"] 

413 add_object_field(schema, field_location, "obj", "") 

414 

415 set_additional_field_status(schema, ["records"], "obj_1", True) 

416 

417 assert field_location["obj_1"]["additionalProperties"] == True 

418 

419 

420def test_generate_object_from_schema_simple(): 

421 schema = create_boilerplate_schema() 

422 

423 obj = generate_object_from_schema(schema) 

424 

425 assert isinstance(obj, dict) 

426 assert "records" in obj 

427 

428 

429def test_generate_object_from_schema_with_fields(): 

430 schema = create_boilerplate_schema() 

431 field_location = schema["properties"]["records"]["items"]["properties"] 

432 add_primitive_field(schema, field_location, "name", "", PrimitiveFieldType.STRING) 

433 add_primitive_field(schema, field_location, "age", "", PrimitiveFieldType.NUMBER) 

434 

435 obj = generate_object_from_schema(schema) 

436 

437 assert isinstance(obj, dict) 

438 assert "records" in obj 

439 assert isinstance(obj["records"], list) 

440 

441 

442def test_generate_object_from_schema_with_enum(): 

443 schema = create_boilerplate_schema() 

444 field_location = schema["properties"]["records"]["items"]["properties"] 

445 add_primitive_field(schema, field_location, "status", "", PrimitiveFieldType.STRING) 

446 field_location["status_1"]["enum"] = ["active", "inactive"] 

447 

448 obj = generate_object_from_schema(schema) 

449 

450 # Should use first enum value 

451 assert obj["records"][0]["status_1"] == "active" 

452 

453 

454def test_convert_to_dataframe(): 

455 json_obj = {"records": [{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]} 

456 

457 df = convert_to_dataframe(json_obj) 

458 

459 assert isinstance(df, pd.DataFrame) 

460 assert len(df) == 1 # json_normalize at root level 

461 

462 

463def test_evaluate_object_and_schema_valid(): 

464 schema = create_boilerplate_schema() 

465 obj = generate_object_from_schema(schema) 

466 

467 result = evaluate_object_and_schema(obj, schema) 

468 

469 assert result == ValidationResult.VALID 

470 

471 

472def test_evaluate_schema(): 

473 schema = create_boilerplate_schema() 

474 

475 result = evaluate_schema(schema) 

476 

477 assert result == ValidationResult.VALID 

478 

479 

480def test_unique_field_labels(): 

481 # Test that adding multiple fields with same label creates unique labels 

482 schema = create_boilerplate_schema() 

483 field_location = schema["properties"]["records"]["items"]["properties"] 

484 

485 label1 = add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING) 

486 label2 = add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING) 

487 

488 assert label1 != label2 

489 assert label1 in field_location 

490 assert label2 in field_location