# Core dependencies
click>=8.1.0
pydantic>=2.0.0
pyyaml>=6.0

# PDF processing
PyMuPDF>=1.23.0
pdfminer.six>=20221105
ocrmypdf>=15.0.0
pdfplumber>=0.11.0

# Document parsing and metadata
requests>=2.31.0
habanero>=1.2.3  # Crossref API client

# Text processing and similarity
datasketch>=1.6.0  # MinHash for near-duplicate detection
xxhash>=3.4.0  # Fast hashing

# Embeddings and LLM
ollama>=0.1.0  # Ollama Python client
google-generativeai>=0.3.0  # Gemini API client
numpy>=1.24.0
scikit-learn>=1.3.0  # For cosine similarity

# Data handling
pandas>=2.0.0
openpyxl>=3.1.0  # Excel support

# Caching and storage
sqlalchemy>=2.0.0
filelock>=3.12.0  # File locking for manifest concurrency

# CLI and utilities
tqdm>=4.66.0  # Progress bars
coloredlogs>=15.0
python-dotenv>=1.0.0

# Testing
pytest>=7.4.0
pytest-cov>=4.1.0
pytest-mock>=3.12.0

# Code quality
black>=23.0.0
flake8>=6.0.0
mypy>=1.5.0
isort>=5.12.0

# Git conventional commits
commitizen>=3.10.0

# Type stubs
types-requests>=2.31.0
types-PyYAML>=6.0.0
