Coverage for intelligence_toolkit/tests/unit/generate_mock_data/test_schema_builder.py: 100%
262 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
1# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
2# Licensed under the MIT license. See LICENSE file in the project.
4import pytest
5import pandas as pd
6from intelligence_toolkit.generate_mock_data.schema_builder import (
7 StringFormat,
8 FieldType,
9 ArrayFieldType,
10 PrimitiveFieldType,
11 ValidationResult,
12 create_boilerplate_schema,
13 add_object_field,
14 add_array_field,
15 add_primitive_field,
16 get_subobject,
17 get_required_list,
18 set_string_min_length,
19 set_string_max_length,
20 set_string_pattern,
21 set_string_format,
22 clear_string_constraints,
23 set_number_minimum,
24 set_number_maximum,
25 set_number_multiple_of,
26 clear_number_constraints,
27 rename_field,
28 delete_field,
29 move_field_up,
30 move_field_down,
31 set_required_field_status,
32 set_enum_field_status,
33 set_additional_field_status,
34 generate_object_from_schema,
35 convert_to_dataframe,
36 evaluate_object_and_schema,
37 evaluate_schema,
38)
41def test_string_format_enum():
42 assert hasattr(StringFormat, "DATE")
43 assert hasattr(StringFormat, "EMAIL")
44 assert hasattr(StringFormat, "UUID")
45 assert StringFormat.DATE.value == "date"
46 assert StringFormat.EMAIL.value == "email"
49def test_field_type_enum():
50 assert hasattr(FieldType, "OBJECT")
51 assert hasattr(FieldType, "ARRAY")
52 assert hasattr(FieldType, "STRING")
53 assert FieldType.STRING.value == "string"
56def test_array_field_type_enum():
57 assert hasattr(ArrayFieldType, "OBJECT")
58 assert hasattr(ArrayFieldType, "STRING")
59 assert not hasattr(ArrayFieldType, "ARRAY") # No nested arrays
62def test_primitive_field_type_enum():
63 assert hasattr(PrimitiveFieldType, "STRING")
64 assert hasattr(PrimitiveFieldType, "NUMBER")
65 assert hasattr(PrimitiveFieldType, "BOOLEAN")
68def test_validation_result_enum():
69 assert hasattr(ValidationResult, "VALID")
70 assert hasattr(ValidationResult, "SCHEMA_INVALID")
71 assert hasattr(ValidationResult, "OBJECT_INVALID")
74def test_create_boilerplate_schema():
75 schema = create_boilerplate_schema()
77 assert isinstance(schema, dict)
78 assert "$schema" in schema
79 assert "title" in schema
80 assert "description" in schema
81 assert "type" in schema
82 assert schema["type"] == "object"
83 assert "properties" in schema
84 assert "records" in schema["properties"]
87def test_create_boilerplate_schema_with_custom_values():
88 schema = create_boilerplate_schema(
89 schema_field="custom_schema",
90 title_field="Custom Title",
91 description_field="Custom Description",
92 )
94 assert schema["$schema"] == "custom_schema"
95 assert schema["title"] == "Custom Title"
96 assert schema["description"] == "Custom Description"
99def test_add_primitive_field_string():
100 schema = create_boilerplate_schema()
101 field_location = schema["properties"]["records"]["items"]["properties"]
103 label = add_primitive_field(
104 schema, field_location, "name", "A name field", PrimitiveFieldType.STRING
105 )
107 assert label in field_location
108 assert field_location[label]["type"] == "string"
109 assert field_location[label]["description"] == "A name field"
112def test_add_primitive_field_number():
113 schema = create_boilerplate_schema()
114 field_location = schema["properties"]["records"]["items"]["properties"]
116 label = add_primitive_field(
117 schema, field_location, "age", "An age field", PrimitiveFieldType.NUMBER
118 )
120 assert label in field_location
121 assert field_location[label]["type"] == "number"
124def test_add_primitive_field_boolean():
125 schema = create_boilerplate_schema()
126 field_location = schema["properties"]["records"]["items"]["properties"]
128 label = add_primitive_field(
129 schema, field_location, "active", "Active status", PrimitiveFieldType.BOOLEAN
130 )
132 assert label in field_location
133 assert field_location[label]["type"] == "boolean"
136def test_add_object_field():
137 schema = create_boilerplate_schema()
138 field_location = schema["properties"]["records"]["items"]["properties"]
140 label = add_object_field(schema, field_location, "address", "An address object")
142 assert label in field_location
143 assert field_location[label]["type"] == "object"
144 assert "properties" in field_location[label]
145 assert field_location[label]["additionalProperties"] == False
148def test_add_array_field_string():
149 schema = create_boilerplate_schema()
150 field_location = schema["properties"]["records"]["items"]["properties"]
152 label = add_array_field(
153 schema, field_location, "tags", "List of tags", ArrayFieldType.STRING
154 )
156 assert label in field_location
157 assert field_location[label]["type"] == "array"
158 assert field_location[label]["items"]["type"] == "string"
161def test_add_array_field_object():
162 schema = create_boilerplate_schema()
163 field_location = schema["properties"]["records"]["items"]["properties"]
165 label = add_array_field(
166 schema, field_location, "contacts", "List of contacts", ArrayFieldType.OBJECT
167 )
169 assert label in field_location
170 assert field_location[label]["type"] == "array"
171 assert field_location[label]["items"]["type"] == "object"
172 assert "properties" in field_location[label]["items"]
175def test_get_subobject_root():
176 schema = create_boilerplate_schema()
178 result = get_subobject(schema, [])
180 assert result == schema["properties"]
183def test_get_subobject_nested():
184 schema = create_boilerplate_schema()
186 result = get_subobject(schema, ["records"])
188 assert "properties" in result or isinstance(result, dict)
191def test_get_required_list():
192 schema = create_boilerplate_schema()
194 required = get_required_list(schema, [])
196 assert isinstance(required, list)
197 assert "records" in required
200def test_set_string_min_length():
201 field = {"type": "string"}
203 set_string_min_length(field, 5)
205 assert field["minLength"] == 5
208def test_set_string_min_length_remove():
209 field = {"type": "string", "minLength": 5}
211 set_string_min_length(field, None)
213 assert "minLength" not in field
216def test_set_string_max_length():
217 field = {"type": "string"}
219 set_string_max_length(field, 100)
221 assert field["maxLength"] == 100
224def test_set_string_pattern():
225 field = {"type": "string"}
227 set_string_pattern(field, "^[A-Z]+$")
229 assert field["pattern"] == "^[A-Z]+$"
232def test_set_string_format():
233 field = {"type": "string"}
235 set_string_format(field, StringFormat.EMAIL)
237 assert field["format"] == "email"
240def test_set_string_format_remove():
241 field = {"type": "string", "format": "email"}
243 set_string_format(field, None)
245 assert "format" not in field
248def test_clear_string_constraints():
249 field = {
250 "type": "string",
251 "minLength": 1,
252 "maxLength": 100,
253 "pattern": ".*",
254 "format": "email",
255 }
257 clear_string_constraints(field)
259 assert "minLength" not in field
260 assert "maxLength" not in field
261 assert "pattern" not in field
262 assert "format" not in field
265def test_set_number_minimum():
266 field = {"type": "number"}
268 set_number_minimum(field, 10, False)
270 assert field["minimum"] == 10
271 assert "exclusiveMinimum" not in field
274def test_set_number_minimum_exclusive():
275 field = {"type": "number"}
277 set_number_minimum(field, 10, True)
279 assert field["exclusiveMinimum"] == 10
280 assert "minimum" not in field
283def test_set_number_maximum():
284 field = {"type": "number"}
286 set_number_maximum(field, 100, False)
288 assert field["maximum"] == 100
291def test_set_number_multiple_of():
292 field = {"type": "number"}
294 set_number_multiple_of(field, 5)
296 assert field["multipleOf"] == 5
299def test_clear_number_constraints():
300 field = {
301 "type": "number",
302 "minimum": 0,
303 "maximum": 100,
304 "multipleOf": 5,
305 }
307 clear_number_constraints(field)
309 assert "minimum" not in field
310 assert "maximum" not in field
311 assert "multipleOf" not in field
314def test_rename_field():
315 schema = create_boilerplate_schema()
316 field_location = schema["properties"]["records"]["items"]["properties"]
317 add_primitive_field(schema, field_location, "oldname", "", PrimitiveFieldType.STRING)
319 rename_field(schema, field_location, ["records"], "oldname_1", "newname")
321 assert "newname" in field_location
322 assert "oldname_1" not in field_location
325def test_delete_field():
326 schema = create_boilerplate_schema()
327 field_location = schema["properties"]["records"]["items"]["properties"]
328 add_primitive_field(schema, field_location, "temp", "", PrimitiveFieldType.STRING)
330 delete_field(schema, ["records"], field_location, "temp_1")
332 assert "temp_1" not in field_location
335def test_move_field_up():
336 schema = create_boilerplate_schema()
337 field_location = schema["properties"]["records"]["items"]["properties"]
338 add_primitive_field(schema, field_location, "first", "", PrimitiveFieldType.STRING)
339 add_primitive_field(schema, field_location, "second", "", PrimitiveFieldType.STRING)
341 keys_before = list(field_location.keys())
342 move_field_up(schema, ["records"], field_location, keys_before[1])
343 keys_after = list(field_location.keys())
345 # Second field should now be first
346 assert keys_after[0] == keys_before[1]
349def test_move_field_down():
350 schema = create_boilerplate_schema()
351 field_location = schema["properties"]["records"]["items"]["properties"]
352 add_primitive_field(schema, field_location, "first", "", PrimitiveFieldType.STRING)
353 add_primitive_field(schema, field_location, "second", "", PrimitiveFieldType.STRING)
355 keys_before = list(field_location.keys())
356 move_field_down(schema, ["records"], field_location, keys_before[0])
357 keys_after = list(field_location.keys())
359 # First field should now be second
360 assert keys_after[1] == keys_before[0]
363def test_set_required_field_status_add():
364 schema = create_boilerplate_schema()
365 field_location = schema["properties"]["records"]["items"]["properties"]
366 add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING)
368 set_required_field_status(schema, ["records"], "field_1", True)
370 required = get_required_list(schema, ["records"])
371 assert "field_1" in required
374def test_set_required_field_status_remove():
375 schema = create_boilerplate_schema()
376 field_location = schema["properties"]["records"]["items"]["properties"]
377 add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING)
378 set_required_field_status(schema, ["records"], "field_1", True)
380 set_required_field_status(schema, ["records"], "field_1", False)
382 required = get_required_list(schema, ["records"])
383 assert "field_1" not in required
386def test_set_enum_field_status_string():
387 schema = create_boilerplate_schema()
388 field_location = schema["properties"]["records"]["items"]["properties"]
389 add_primitive_field(schema, field_location, "status", "", PrimitiveFieldType.STRING)
391 changed = set_enum_field_status(schema, ["records"], "status_1", True)
393 assert changed
394 assert "enum" in field_location["status_1"]
395 assert isinstance(field_location["status_1"]["enum"], list)
398def test_set_enum_field_status_remove():
399 schema = create_boilerplate_schema()
400 field_location = schema["properties"]["records"]["items"]["properties"]
401 add_primitive_field(schema, field_location, "status", "", PrimitiveFieldType.STRING)
402 set_enum_field_status(schema, ["records"], "status_1", True)
404 changed = set_enum_field_status(schema, ["records"], "status_1", False)
406 assert changed
407 assert "enum" not in field_location["status_1"]
410def test_set_additional_field_status():
411 schema = create_boilerplate_schema()
412 field_location = schema["properties"]["records"]["items"]["properties"]
413 add_object_field(schema, field_location, "obj", "")
415 set_additional_field_status(schema, ["records"], "obj_1", True)
417 assert field_location["obj_1"]["additionalProperties"] == True
420def test_generate_object_from_schema_simple():
421 schema = create_boilerplate_schema()
423 obj = generate_object_from_schema(schema)
425 assert isinstance(obj, dict)
426 assert "records" in obj
429def test_generate_object_from_schema_with_fields():
430 schema = create_boilerplate_schema()
431 field_location = schema["properties"]["records"]["items"]["properties"]
432 add_primitive_field(schema, field_location, "name", "", PrimitiveFieldType.STRING)
433 add_primitive_field(schema, field_location, "age", "", PrimitiveFieldType.NUMBER)
435 obj = generate_object_from_schema(schema)
437 assert isinstance(obj, dict)
438 assert "records" in obj
439 assert isinstance(obj["records"], list)
442def test_generate_object_from_schema_with_enum():
443 schema = create_boilerplate_schema()
444 field_location = schema["properties"]["records"]["items"]["properties"]
445 add_primitive_field(schema, field_location, "status", "", PrimitiveFieldType.STRING)
446 field_location["status_1"]["enum"] = ["active", "inactive"]
448 obj = generate_object_from_schema(schema)
450 # Should use first enum value
451 assert obj["records"][0]["status_1"] == "active"
454def test_convert_to_dataframe():
455 json_obj = {"records": [{"name": "Alice", "age": 30}, {"name": "Bob", "age": 25}]}
457 df = convert_to_dataframe(json_obj)
459 assert isinstance(df, pd.DataFrame)
460 assert len(df) == 1 # json_normalize at root level
463def test_evaluate_object_and_schema_valid():
464 schema = create_boilerplate_schema()
465 obj = generate_object_from_schema(schema)
467 result = evaluate_object_and_schema(obj, schema)
469 assert result == ValidationResult.VALID
472def test_evaluate_schema():
473 schema = create_boilerplate_schema()
475 result = evaluate_schema(schema)
477 assert result == ValidationResult.VALID
480def test_unique_field_labels():
481 # Test that adding multiple fields with same label creates unique labels
482 schema = create_boilerplate_schema()
483 field_location = schema["properties"]["records"]["items"]["properties"]
485 label1 = add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING)
486 label2 = add_primitive_field(schema, field_location, "field", "", PrimitiveFieldType.STRING)
488 assert label1 != label2
489 assert label1 in field_location
490 assert label2 in field_location