CONTRIBUTING.md
LICENSE
MANIFEST.in
README.md
README_zh.md
docker-compose.yml
magic-pdf.template.json
mkdocs.yml
pyproject.toml
pytest.ini
requirements.txt
setup.cfg
setup.py
tox.ini
./datamax/__init__.py
./datamax/core.py
datamax/__init__.py
datamax/core.py
datamax/cleaner/__init__.py
datamax/cleaner/data_cleaner.py
datamax/cli/__init__.py
datamax/cli/cleaner_cli.py
datamax/cli/commands.py
datamax/cli/crawler_cli.py
datamax/cli/generator_cli.py
datamax/cli/main.py
datamax/cli/parser_cli.py
datamax/crawler/__init__.py
datamax/crawler/arxiv_crawler.py
datamax/crawler/base_crawler.py
datamax/crawler/config_manager.py
datamax/crawler/crawl.py
datamax/crawler/crawler_factory.py
datamax/crawler/exceptions.py
datamax/crawler/logging_config.py
datamax/crawler/storage_adapter.py
datamax/crawler/web_crawler.py
datamax/evaluator/README.md
datamax/evaluator/__init__.py
datamax/evaluator/gen_evaluate_pipeline.py
datamax/evaluator/image_evaluator.py
datamax/evaluator/multimodal_evaluator.py
datamax/evaluator/text_evaluator.py
datamax/generator/__init__.py
datamax/generator/agent_qa_generator.py
datamax/generator/auth.py
datamax/generator/domain_tree.py
datamax/generator/multimodal_qa_generator.py
datamax/generator/prompt_templates.py
datamax/generator/qa_generator.py
datamax/generator/agent/__init__.py
datamax/generator/agent/classifier.py
datamax/generator/agent/config.py
datamax/generator/agent/models.py
datamax/generator/agent/progress.py
datamax/generator/agent/questions.py
datamax/generator/agent/review.py
datamax/generator/agent/runners.py
datamax/generator/agent/spec.py
datamax/loader/__init__.py
datamax/loader/core.py
datamax/loader/minio_handler.py
datamax/loader/obs_handler.py
datamax/loader/oss_handler.py
datamax/parser/__init__.py
datamax/parser/base.py
datamax/parser/code_parser.py
datamax/parser/crawler_parser.py
datamax/parser/csv_parser.py
datamax/parser/doc_parser.py
datamax/parser/docx_parser.py
datamax/parser/epub_parser.py
datamax/parser/html_parser.py
datamax/parser/image_parser.py
datamax/parser/json_parser.py
datamax/parser/md_parser.py
datamax/parser/pdf_parser.py
datamax/parser/ppt_parser.py
datamax/parser/pptx_parser.py
datamax/parser/txt_parser.py
datamax/parser/wps_parser.py
datamax/parser/xls_parser.py
datamax/parser/xlsx_parser.py
datamax/utils/__init__.py
datamax/utils/agent_api_core.py
datamax/utils/debug_logger.py
datamax/utils/lifecycle_types.py
datamax/utils/mineru_operator.py
datamax/utils/paddleocr_pdf_operator.py
datamax/utils/performance_monitor.py
datamax/utils/ppt_extract.py
datamax/utils/uno_handler.py
docs/api.md
docs/cleaning.md
docs/cli.md
docs/crawling.md
docs/debug-system.md
docs/evaluation.md
docs/extend.md
docs/faq.md
docs/getting-started.md
docs/index.md
docs/labeling.md
docs/parsing.md
docs/requirements.txt
docs/best-practices/generate-qa.md
examples/scripts/clean_text.py
examples/scripts/crawl_web.py
examples/scripts/evaluate_text.py
examples/scripts/generate_agent_qa.py
examples/scripts/generate_multimodal_qa.py
examples/scripts/generate_qa.py
examples/scripts/parse_file.py
pydatamax.egg-info/PKG-INFO
pydatamax.egg-info/SOURCES.txt
pydatamax.egg-info/dependency_links.txt
pydatamax.egg-info/entry_points.txt
pydatamax.egg-info/not-zip-safe
pydatamax.egg-info/requires.txt
pydatamax.egg-info/top_level.txt
scripts/clean_pycache.py
scripts/docker-entrypoint.sh
scripts/download_models.py
scripts/format_code.py
scripts/healthcheck.py
scripts/install.bat
scripts/install.sh
scripts/migrate.py
scripts/setup-dev.sh
tests/__init__.py
tests/test_all_types.py