Coverage for intelligence_toolkit/generate_mock_data/text_generator.py: 100%

21 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1# Copyright (c) 2024 Microsoft Corporation. All rights reserved. 

2import pandas as pd 

3import asyncio 

4from tqdm.asyncio import tqdm_asyncio 

5import intelligence_toolkit.AI.utils as utils 

6import intelligence_toolkit.generate_mock_data.prompts as prompts 

7from intelligence_toolkit.AI.openai_configuration import OpenAIConfiguration 

8 

9 

10async def generate_text_data( 

11 ai_configuration: OpenAIConfiguration, 

12 input_texts: list[str], 

13 generation_guidance: str = "", 

14 temperature: float = 0.5, 

15 df_update_callback=None, 

16 parallelism: int = 10, 

17): 

18 df = pd.DataFrame(columns=["mock_text"]) 

19 generated_texts = [] 

20 # batch the input_texts into groups of parallelism 

21 batches = [ 

22 input_texts[i : i + parallelism] 

23 for i in range(0, len(input_texts), parallelism) 

24 ] 

25 for batch in batches: 

26 tasks = [ 

27 asyncio.create_task(_generate_text_async( 

28 ai_configuration=ai_configuration, 

29 input_text=text, 

30 generation_guidance=generation_guidance, 

31 temperature=temperature, 

32 )) for text in batch] 

33 new_generated_texts = await tqdm_asyncio.gather(*tasks) 

34 generated_texts.extend(new_generated_texts) 

35 df = pd.DataFrame(generated_texts, columns=["mock_text"]) 

36 if df_update_callback is not None: 

37 df_update_callback(df) 

38 return generated_texts, df 

39 

40 

41async def _generate_text_async( 

42 ai_configuration, input_text, generation_guidance, temperature 

43): 

44 messages = utils.prepare_messages( 

45 prompts.text_generation_prompt, 

46 { 

47 "input_text": input_text, 

48 "generation_guidance": generation_guidance, 

49 }, 

50 ) 

51 

52 return await utils.generate_text_async( 

53 ai_configuration, messages, stream=False, temperature=temperature 

54 )