Coverage for intelligence_toolkit/tests/unit/generate_mock_data/test_text_generator.py: 100%

70 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2# Licensed under the MIT license. See LICENSE file in the project. 

3 

4import pytest 

5import pandas as pd 

6from unittest.mock import MagicMock, patch, AsyncMock 

7from intelligence_toolkit.generate_mock_data.text_generator import ( 

8 generate_text_data, 

9) 

10 

11 

12@pytest.mark.asyncio 

13@patch("intelligence_toolkit.generate_mock_data.text_generator.tqdm_asyncio.gather") 

14@patch("intelligence_toolkit.generate_mock_data.text_generator._generate_text_async") 

15async def test_generate_text_data_basic(mock_generate, mock_gather): 

16 mock_generate.return_value = "Generated text" 

17 mock_gather.return_value = ["Text 1", "Text 2", "Text 3"] 

18 

19 ai_config = MagicMock() 

20 input_texts = ["input1", "input2", "input3"] 

21 

22 texts, df = await generate_text_data(ai_config, input_texts) 

23 

24 assert isinstance(texts, list) 

25 assert isinstance(df, pd.DataFrame) 

26 assert len(texts) == 3 

27 assert "mock_text" in df.columns 

28 

29 

30@pytest.mark.asyncio 

31@patch("intelligence_toolkit.generate_mock_data.text_generator.tqdm_asyncio.gather") 

32@patch("intelligence_toolkit.generate_mock_data.text_generator._generate_text_async") 

33async def test_generate_text_data_with_callback(mock_generate, mock_gather): 

34 mock_gather.return_value = ["Generated text"] 

35 callback = MagicMock() 

36 

37 ai_config = MagicMock() 

38 input_texts = ["input"] 

39 

40 texts, df = await generate_text_data( 

41 ai_config, input_texts, df_update_callback=callback 

42 ) 

43 

44 assert callback.called 

45 

46 

47@pytest.mark.asyncio 

48@patch("intelligence_toolkit.generate_mock_data.text_generator.tqdm_asyncio.gather") 

49async def test_generate_text_data_empty_input(mock_gather): 

50 mock_gather.return_value = [] 

51 

52 ai_config = MagicMock() 

53 input_texts = [] 

54 

55 texts, df = await generate_text_data(ai_config, input_texts) 

56 

57 assert texts == [] 

58 assert len(df) == 0 

59 

60 

61@pytest.mark.asyncio 

62@patch("intelligence_toolkit.generate_mock_data.text_generator.tqdm_asyncio.gather") 

63@patch("intelligence_toolkit.generate_mock_data.text_generator._generate_text_async") 

64async def test_generate_text_data_with_parameters(mock_generate, mock_gather): 

65 mock_gather.return_value = ["Text"] 

66 

67 ai_config = MagicMock() 

68 input_texts = ["input"] 

69 generation_guidance = "Be creative" 

70 temperature = 0.8 

71 

72 texts, df = await generate_text_data( 

73 ai_config, 

74 input_texts, 

75 generation_guidance=generation_guidance, 

76 temperature=temperature, 

77 ) 

78 

79 assert isinstance(texts, list) 

80 assert len(texts) == 1 

81 

82 

83@pytest.mark.asyncio 

84@patch("intelligence_toolkit.generate_mock_data.text_generator.tqdm_asyncio.gather") 

85@patch("intelligence_toolkit.generate_mock_data.text_generator._generate_text_async") 

86async def test_generate_text_data_batching(mock_generate, mock_gather): 

87 # Test with more inputs than default parallelism 

88 # Mock returns texts for first batch, then second batch 

89 mock_gather.side_effect = [["Text"] * 10, ["Text"] * 5] 

90 

91 ai_config = MagicMock() 

92 input_texts = ["input"] * 15 

93 

94 texts, df = await generate_text_data(ai_config, input_texts, parallelism=10) 

95 

96 # Should batch into groups 

97 assert len(texts) == 15 

98 assert len(df) == 15 

99 

100 

101@pytest.mark.asyncio 

102@patch("intelligence_toolkit.generate_mock_data.text_generator.utils.generate_text_async") 

103@patch("intelligence_toolkit.generate_mock_data.text_generator.utils.prepare_messages") 

104async def test_generate_text_async_internal(mock_prepare, mock_generate): 

105 mock_prepare.return_value = [{"role": "user", "content": "test"}] 

106 mock_generate.return_value = "Generated response" 

107 

108 from intelligence_toolkit.generate_mock_data.text_generator import _generate_text_async 

109 

110 ai_config = MagicMock() 

111 result = await _generate_text_async(ai_config, "input", "guidance", 0.7) 

112 

113 assert result == "Generated response" 

114 assert mock_prepare.called 

115 assert mock_generate.called