beautifulsoup4
click
html5lib
jsonschema
lxml
matplotlib
numpy
pandas
pdftotext>=3.0.0
pikepdf
Pillow>=9.2.0
pypdf[crypto]>=3.1.0
python-dateutil
dateparser
PyYAML
rapidfuzz
requests
scikit-learn
tabula-py
tqdm
setuptools-scm
ipykernel
ipywidgets
pkgconfig
seaborn
pySankeyBeta
scipy>=1.9.0
networkx
pydantic
pydantic-settings
psutil
pytesseract
selenium>=4.38.0

[:platform_machine != "aarch64"]
spacy>=3.8.0

[:platform_machine == "aarch64"]
spacy<3.8.0

[docling]
docling
easyocr

[nlp]
catboost
optuna
setfit
umap-learn[plot]
plotly
scikit-learn
sentence-transformers
nltk
sqlite-vec
openai
pyarrow
