MANIFEST.in
README.rst
pyproject.toml
requirements.txt
data_gatherer/__init__.py
data_gatherer/classifier.py
data_gatherer/data_fetcher.py
data_gatherer/data_gatherer.py
data_gatherer/env.py
data_gatherer/logger_setup.py
data_gatherer/resources_loader.py
data_gatherer/selenium_setup.py
data_gatherer.egg-info/PKG-INFO
data_gatherer.egg-info/SOURCES.txt
data_gatherer.egg-info/dependency_links.txt
data_gatherer.egg-info/requires.txt
data_gatherer.egg-info/top_level.txt
data_gatherer/config/data_repos_ontology.json
data_gatherer/config/open_bio_data_repos.json
data_gatherer/config/retrieval_patterns.json
data_gatherer/llm/__init__.py
data_gatherer/llm/llm_client.py
data_gatherer/llm/response_schema.py
data_gatherer/parser/__init__.py
data_gatherer/parser/base_parser.py
data_gatherer/parser/grobid_pdf_parser.py
data_gatherer/parser/html_parser.py
data_gatherer/parser/pdf_parser.py
data_gatherer/parser/xml_parser.py
data_gatherer/prompts/__init__.py
data_gatherer/prompts/prompt_manager.py
data_gatherer/prompts/prompt_templates/GEMINI_from_full_input_Examples_1.json
data_gatherer/prompts/prompt_templates/GEMINI_from_full_input_Examples_2.json
data_gatherer/prompts/prompt_templates/GEMINI_from_full_input_Examples_3.json
data_gatherer/prompts/prompt_templates/GEMINI_from_full_input_Examples_4.json
data_gatherer/prompts/prompt_templates/GPT_FDR_FewShot_CitationType.json
data_gatherer/prompts/prompt_templates/GPT_FDR_FewShot_Syn.json
data_gatherer/prompts/prompt_templates/GPT_RTR_FewShot_CitationType.json
data_gatherer/prompts/prompt_templates/GPT_from_full_input_Examples.json
data_gatherer/prompts/prompt_templates/GPT_from_full_input_Exmpl_Descr.json
data_gatherer/prompts/prompt_templates/GPT_from_full_input_Exmpl_Descr_1.json
data_gatherer/prompts/prompt_templates/datasets_info_extract_JSON.json
data_gatherer/prompts/prompt_templates/retrieve_datasets_fromDAS.json
data_gatherer/prompts/prompt_templates/retrieve_datasets_role_JSON.json
data_gatherer/prompts/prompt_templates/retrieve_datasets_simple.json
data_gatherer/prompts/prompt_templates/retrieve_datasets_simple_JSON.json
data_gatherer/prompts/prompt_templates/retrieve_datasets_simple_JSON_gemini.json
data_gatherer/prompts/prompt_templates/metadata_prompts/gemini_metadata_extract.json
data_gatherer/prompts/prompt_templates/metadata_prompts/gpt_metadata_extract.json
data_gatherer/prompts/prompt_templates/metadata_prompts/portkey_gemini_metadata_extract.json
data_gatherer/retriever/__init__.py
data_gatherer/retriever/base_retriever.py
data_gatherer/retriever/embeddings_retriever.py
data_gatherer/retriever/html_retriever.py
data_gatherer/retriever/xml_retriever.py
tests/test_data_fetcher_methods.py
tests/test_orchestrator.py
tests/test_parser_utils.py