datasets>=2.14.0
transformers>=4.30.0
click>=8.0.0
tqdm>=4.65.0
pyarrow>=12.0.0
structlog>=24.0.0

[all]
dalla-data-processing[dedup,dedup-native,dev,pack,quality,readability,stem]

[dedup]
camel-tools==1.5.7

[dedup-native]
cffi>=1.15.0

[dev]
pytest>=7.0.0
pytest-cov>=4.0.0
ruff>=0.1.0
pre-commit>=3.0.0

[pack]
sentencepiece>=0.2.0
pyyaml

[quality]
camel-tools==1.5.7

[readability]
textstat>=0.7.0

[stem]
camel-tools==1.5.7
