Coverage for intelligence_toolkit/generate_mock_data/text_generator.py: 100%
21 statements
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
« prev ^ index » next coverage.py v7.10.7, created at 2025-10-16 13:41 -0300
1# Copyright (c) 2024 Microsoft Corporation. All rights reserved.
2import pandas as pd
3import asyncio
4from tqdm.asyncio import tqdm_asyncio
5import intelligence_toolkit.AI.utils as utils
6import intelligence_toolkit.generate_mock_data.prompts as prompts
7from intelligence_toolkit.AI.openai_configuration import OpenAIConfiguration
10async def generate_text_data(
11 ai_configuration: OpenAIConfiguration,
12 input_texts: list[str],
13 generation_guidance: str = "",
14 temperature: float = 0.5,
15 df_update_callback=None,
16 parallelism: int = 10,
17):
18 df = pd.DataFrame(columns=["mock_text"])
19 generated_texts = []
20 # batch the input_texts into groups of parallelism
21 batches = [
22 input_texts[i : i + parallelism]
23 for i in range(0, len(input_texts), parallelism)
24 ]
25 for batch in batches:
26 tasks = [
27 asyncio.create_task(_generate_text_async(
28 ai_configuration=ai_configuration,
29 input_text=text,
30 generation_guidance=generation_guidance,
31 temperature=temperature,
32 )) for text in batch]
33 new_generated_texts = await tqdm_asyncio.gather(*tasks)
34 generated_texts.extend(new_generated_texts)
35 df = pd.DataFrame(generated_texts, columns=["mock_text"])
36 if df_update_callback is not None:
37 df_update_callback(df)
38 return generated_texts, df
41async def _generate_text_async(
42 ai_configuration, input_text, generation_guidance, temperature
43):
44 messages = utils.prepare_messages(
45 prompts.text_generation_prompt,
46 {
47 "input_text": input_text,
48 "generation_guidance": generation_guidance,
49 },
50 )
52 return await utils.generate_text_async(
53 ai_configuration, messages, stream=False, temperature=temperature
54 )