pandas==2.3.0
datasets==4.0.0
transformers==4.52.4
fasteners==0.19
uvicorn==0.34.3
python-dotenv==1.0.1
dacite==1.8.1
click>=8.1.7
dask[distributed]>=2025.2.0
pytest>=8.3.4
httpx==0.28.1
Pillow
PyMuPDF
beautifulsoup4==4.13.4
Unidecode
clean-text
docx2pdf
lxml_html_clean
python-docx
python-pptx
clean-text
requests==2.32.4
selenium==4.34.2
surya-ocr>=0.8.3
xlrd==2.0.1
py7zr==0.22.0
rarfile==4.2
markdown==3.7
markdownify==0.13.1
marker-pdf==1.7.5
moviepy==2.1.1
openpyxl==3.1.5
chonkie==0.2.1.post1
langdetect>=1.0.9
trafilatura==2.0.0
validators==0.35.0
bokeh
motor==3.7.1
mpmath==1.3.0
networkx==3.4.2
fastapi[standard]
fastapi==0.115.13
pydantic==2.11.7
pymongo==4.13.2
pymilvus==2.5.0
milvus-model==0.2.12
accelerate==1.7.0
cohere==5.15.0
langchain-anthropic==0.3.4
langchain-aws==0.2.30
langchain-cohere==0.4.2
langchain_community==0.3.25
langchain-huggingface==0.1.2
langchain-milvus==0.1.8
langchain-mistralai==0.2.7
langchain-nvidia-ai-endpoints
langchain-openai==0.3.28
langchain==0.3.27
markdownify==0.13.1
ragas==0.3.1
nltk>=3.9
starlette==0.46
typing_extensions==4.14.1
sympy==1.14.0
google-auth==2.39.0
google-api-python-client==2.173.0
mammoth==1.9.0
argostranslate
sentence-transformers
langid

[:python_version < "3.12"]
numpy==1.26.4
datatrove==0.3.0

[:python_version >= "3.12"]
numpy==2.2.6
datatrove==0.6.0

[cpu]
torch>=2.5.1

[cu124]
torch>=2.5.1

[dev]
pytest>=8.0.0
ruff>=0.4.0

[rag]
accelerate
cohere==5.15.0
langchain-anthropic==0.3.4
langchain-aws==0.2.30
langchain-cohere==0.4.2
langchain-huggingface==0.1.2
langchain-milvus==0.1.8
langchain-mistralai==0.2.7
langchain-nvidia-ai-endpoints
langchain-openai==0.3.28
langchain==0.3.27
langdetect>=1.0.9
pymilvus==2.5.0
milvus-model==0.2.12
ragas==0.3.1
nltk>=3.9
