Coverage for intelligence_toolkit/extract_record_data/api.py: 0%

17 statements  

« prev     ^ index     » next       coverage.py v7.10.7, created at 2025-10-16 13:41 -0300

1import intelligence_toolkit.extract_record_data.data_extractor as data_extractor 

2import intelligence_toolkit.generate_mock_data.data_generator as data_generator 

3from intelligence_toolkit.AI.openai_configuration import OpenAIConfiguration 

4import pandas as pd 

5 

6 

7class ExtractRecordData: 

8 def __init__(self): 

9 self.json_schema = {} 

10 self.record_arrays = [] 

11 self.json_object = {} 

12 self.array_dfs = {} 

13 

14 def set_schema(self, json_schema: dict): 

15 self.json_schema = json_schema 

16 self.record_arrays: list[list[str]] = data_generator.extract_array_fields( 

17 json_schema 

18 ) 

19 

20 def set_ai_configuration(self, ai_configuration: OpenAIConfiguration): 

21 self.ai_configuration = ai_configuration 

22 

23 async def extract_record_data( 

24 self, 

25 input_texts: list[str], 

26 generation_guidance: str = "", 

27 df_update_callback=None, 

28 callback_batch=None, 

29 ): 

30 """ 

31 Extracts structured data records from input texts according to the JSON schema 

32 

33 Args: 

34 input_texts (list[str]): The list of input texts to extract data from 

35 generation_guidance (str): Optional guidance to provide to the model 

36 df_update_callback (function): A callback function to update the dataframe 

37 callback_batch (function): A callback function to update the batch 

38 """ 

39 self.json_object, self.array_dfs = await data_extractor.extract_record_data( 

40 ai_configuration=self.ai_configuration, 

41 input_texts=input_texts, 

42 data_schema=self.json_schema, 

43 record_arrays=self.record_arrays, 

44 generation_guidance=generation_guidance, 

45 df_update_callback=df_update_callback, 

46 callback_batch=callback_batch, 

47 )