Coverage for intelligence_toolkit/tests/unit/generate_mock_data/test_api.py: 100%
139 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
1# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
2# Licensed under the MIT license. See LICENSE file in the project.
4import pytest
5import pandas as pd
6from unittest.mock import MagicMock, patch, AsyncMock
7from intelligence_toolkit.generate_mock_data.api import GenerateMockData
10def test_generate_mock_data_initialization():
11 gmd = GenerateMockData()
13 assert gmd.json_schema == {}
14 assert gmd.record_arrays == []
15 assert gmd.json_object == {}
16 assert gmd.array_dfs == {}
19def test_set_schema():
20 gmd = GenerateMockData()
21 schema = {
22 "properties": {
23 "records": {"type": "array", "items": {"type": "object", "properties": {}}}
24 }
25 }
27 gmd.set_schema(schema)
29 assert gmd.json_schema == schema
30 assert isinstance(gmd.record_arrays, list)
33@patch("intelligence_toolkit.generate_mock_data.api.data_generator.extract_array_fields")
34def test_set_schema_extracts_arrays(mock_extract):
35 mock_extract.return_value = [["records"], ["items"]]
37 gmd = GenerateMockData()
38 schema = {"properties": {}}
40 gmd.set_schema(schema)
42 assert mock_extract.called
43 assert gmd.record_arrays == [["records"], ["items"]]
46def test_set_ai_configuration():
47 gmd = GenerateMockData()
48 ai_config = MagicMock()
50 gmd.set_ai_configuration(ai_config)
52 assert gmd.ai_configuration == ai_config
55@pytest.mark.asyncio
56@patch("intelligence_toolkit.generate_mock_data.api.data_generator.generate_data")
57async def test_generate_data_records_basic(mock_generate):
58 mock_generate.return_value = (
59 {"records": []},
60 {"records": pd.DataFrame()},
61 )
63 gmd = GenerateMockData()
64 gmd.set_ai_configuration(MagicMock())
65 gmd.set_schema({"properties": {}})
67 await gmd.generate_data_records(
68 num_records_overall=10,
69 records_per_batch=5,
70 duplicate_records_per_batch=1,
71 related_records_per_batch=1,
72 )
74 assert mock_generate.called
75 assert isinstance(gmd.json_object, dict)
76 assert isinstance(gmd.array_dfs, dict)
79@pytest.mark.asyncio
80@patch("intelligence_toolkit.generate_mock_data.api.data_generator.generate_data")
81async def test_generate_data_records_with_guidance(mock_generate):
82 mock_generate.return_value = ({}, {})
84 gmd = GenerateMockData()
85 gmd.set_ai_configuration(MagicMock())
86 gmd.set_schema({"properties": {}})
88 await gmd.generate_data_records(
89 num_records_overall=5,
90 records_per_batch=5,
91 duplicate_records_per_batch=0,
92 related_records_per_batch=0,
93 generation_guidance="Generate realistic data",
94 temperature=0.7,
95 )
97 call_args = mock_generate.call_args
98 assert call_args[1]["generation_guidance"] == "Generate realistic data"
99 assert call_args[1]["temperature"] == 0.7
102@pytest.mark.asyncio
103@patch("intelligence_toolkit.generate_mock_data.api.data_generator.generate_data")
104async def test_generate_data_records_with_callbacks(mock_generate):
105 mock_generate.return_value = ({}, {})
106 df_callback = MagicMock()
107 batch_callback = MagicMock()
109 gmd = GenerateMockData()
110 gmd.set_ai_configuration(MagicMock())
111 gmd.set_schema({"properties": {}})
113 await gmd.generate_data_records(
114 num_records_overall=10,
115 records_per_batch=5,
116 duplicate_records_per_batch=1,
117 related_records_per_batch=1,
118 df_update_callback=df_callback,
119 callback_batch=batch_callback,
120 )
122 call_args = mock_generate.call_args[1]
123 assert call_args["df_update_callback"] == df_callback
124 assert call_args["callback_batch"] == batch_callback
127@pytest.mark.asyncio
128@patch("intelligence_toolkit.generate_mock_data.api.data_generator.generate_data")
129async def test_generate_data_records_parallel_batches(mock_generate):
130 mock_generate.return_value = ({}, {})
132 gmd = GenerateMockData()
133 gmd.set_ai_configuration(MagicMock())
134 gmd.set_schema({"properties": {}})
136 await gmd.generate_data_records(
137 num_records_overall=50,
138 records_per_batch=10,
139 duplicate_records_per_batch=2,
140 related_records_per_batch=2,
141 parallel_batches=10,
142 )
144 call_args = mock_generate.call_args[1]
145 assert call_args["parallel_batches"] == 10
148@pytest.mark.asyncio
149@patch("intelligence_toolkit.generate_mock_data.api.text_generator.generate_text_data")
150async def test_generate_text_data_basic(mock_generate):
151 mock_generate.return_value = (
152 ["Text 1", "Text 2"],
153 pd.DataFrame({"mock_text": ["Text 1", "Text 2"]}),
154 )
156 gmd = GenerateMockData()
157 gmd.set_ai_configuration(MagicMock())
159 df = pd.DataFrame({"name": ["Alice", "Bob"], "age": [30, 25]})
161 await gmd.generate_text_data(df)
163 assert mock_generate.called
164 assert hasattr(gmd, "text_list")
165 assert hasattr(gmd, "text_df")
166 assert isinstance(gmd.text_list, list)
167 assert isinstance(gmd.text_df, pd.DataFrame)
170@pytest.mark.asyncio
171@patch("intelligence_toolkit.generate_mock_data.api.text_generator.generate_text_data")
172async def test_generate_text_data_converts_rows_to_json(mock_generate):
173 mock_generate.return_value = (["Text"], pd.DataFrame())
175 gmd = GenerateMockData()
176 gmd.set_ai_configuration(MagicMock())
178 df = pd.DataFrame({"col1": ["value1"], "col2": ["value2"]})
180 await gmd.generate_text_data(df)
182 # Should convert each row to JSON string
183 call_args = mock_generate.call_args[1]
184 input_texts = call_args["input_texts"]
185 assert isinstance(input_texts, list)
186 assert len(input_texts) == 1
189@pytest.mark.asyncio
190@patch("intelligence_toolkit.generate_mock_data.api.text_generator.generate_text_data")
191async def test_generate_text_data_with_parameters(mock_generate):
192 mock_generate.return_value = ([], pd.DataFrame())
194 gmd = GenerateMockData()
195 gmd.set_ai_configuration(MagicMock())
197 df = pd.DataFrame({"data": [1]})
199 await gmd.generate_text_data(
200 df,
201 generation_guidance="Be concise",
202 temperature=0.3,
203 )
205 call_args = mock_generate.call_args[1]
206 assert call_args["generation_guidance"] == "Be concise"
207 assert call_args["temperature"] == 0.3
210@pytest.mark.asyncio
211@patch("intelligence_toolkit.generate_mock_data.api.text_generator.generate_text_data")
212async def test_generate_text_data_with_callback(mock_generate):
213 mock_generate.return_value = ([], pd.DataFrame())
214 callback = MagicMock()
216 gmd = GenerateMockData()
217 gmd.set_ai_configuration(MagicMock())
219 df = pd.DataFrame({"data": [1]})
221 await gmd.generate_text_data(df, df_update_callback=callback)
223 call_args = mock_generate.call_args[1]
224 assert call_args["df_update_callback"] == callback
227@pytest.mark.asyncio
228@patch("intelligence_toolkit.generate_mock_data.api.text_generator.generate_text_data")
229async def test_generate_text_data_empty_dataframe(mock_generate):
230 mock_generate.return_value = ([], pd.DataFrame())
232 gmd = GenerateMockData()
233 gmd.set_ai_configuration(MagicMock())
235 df = pd.DataFrame()
237 await gmd.generate_text_data(df)
239 call_args = mock_generate.call_args[1]
240 assert call_args["input_texts"] == []
243def test_schema_persistence():
244 gmd = GenerateMockData()
245 schema1 = {"properties": {"field1": {}}}
246 schema2 = {"properties": {"field2": {}}}
248 gmd.set_schema(schema1)
249 assert gmd.json_schema == schema1
251 gmd.set_schema(schema2)
252 assert gmd.json_schema == schema2