numpy<2.0.0
datasets
scipy
torch
torchvision
torchaudio
tqdm
transformers<4.54.0
accelerate
rapidfuzz
colorlog
librosa
appdirs
datasketch
httpx[socks]
modelscope
addict
pytest
rich
chonkie
pydantic
nltk
colorama
json5
tiktoken
sqlglot
gradio>5
fasttext-wheel
openai
sentencepiece
datasketch
presidio_analyzer[transformers]
vendi-score==0.0.3
google-api-core
google-api-python-client
contractions
cookiecutter
trafilatura
lxml_html_clean
pymupdf
cloudpickle
pandas
google-cloud-aiplatform>=1.55
google-cloud-bigquery
google-genai
gcsfs
networkx
pyvis