absl-py<3.0.0,>=2.0.0
comment_parser
cosmos-xenna==0.2.0
fsspec
hydra-core
jieba==0.42.1
loguru
mecab-python3
omegaconf
openai>=1.0.0
pandas>=2.1.0
pyarrow
ray[data,default]>=2.54
torch
transformers

[all]
nemo_curator[audio_cuda12]
nemo_curator[image_cuda12]
nemo_curator[inference_server]
nemo_curator[interleaved_cuda12]
nemo_curator[math_cuda12]
nemo_curator[sdg_cuda12]
nemo_curator[text_cuda12]
nemo_curator[video_cuda12]

[audio_common]
soundfile>=0.12.0
torchaudio
onnx>=1.19.0
silero-vad
librosa
scipy
pydub>=0.25.1

[audio_common:platform_machine == "x86_64" and platform_system != "Darwin"]
nemo_toolkit[asr]>=2.7.2

[audio_cpu]
nemo_curator[audio_common]
onnxruntime<1.24,>=1.20.1

[audio_cuda12]
nemo_curator[audio_common]
nemo_curator[cuda12]
nvidia-cudnn-cu12

[audio_cuda12:platform_machine == "x86_64"]
onnxruntime-gpu<1.24,>=1.20.1

[audio_cuda12:platform_machine == "x86_64" and platform_system != "Darwin"]
torchcodec

[cuda12]
gpustat
nvidia-ml-py

[deduplication_cuda12]
cudf-cu12==25.10.*
cuml-cu12==25.10.*
scikit-learn<1.8.0
pylibcugraph-cu12==25.10.*
pylibraft-cu12==25.10.*
raft-dask-cu12==25.10.*
rapidsmpf-cu12==25.10.*

[image_cpu]
Pillow
torchvision

[image_cuda12]
nemo_curator[image_cpu]
nemo_curator[cuda12]
nemo_curator[deduplication_cuda12]
nvidia-dali-cuda120

[inference_server]
nemo_curator[cuda12]
nemo_curator[vllm]
boto3>=1.35
ray[llm,serve]>=2.54

[inference_server:platform_machine == "x86_64" and platform_system != "Darwin"]
nixl-cu12>=0.10.0
vllm<0.16.0

[interleaved_cpu]
albumentations
open_clip_torch
opencv-python
Pillow
pypdfium2
s3fs>=2024.12.0
timm

[interleaved_cuda12]
nemo_curator[interleaved_cpu]
nemo_curator[cuda12]
nemo_curator[vllm]

[math_cpu]
nemo_curator[text_cpu]
boto3>=1.35

[math_cuda12]
nemo_curator[math_cpu]
nemo_curator[cuda12]
nemo_curator[deduplication_cuda12]

[math_cuda12:platform_machine == "x86_64" and platform_system != "Darwin"]
vllm>=0.13

[sdg_cpu]
data-designer==0.5.5

[sdg_cuda12]
nemo_curator[cuda12]
nemo_curator[sdg_cpu]
nemo_curator[inference_server]

[text_cpu]
beautifulsoup4
justext
lxml>=6.1.0
pycld2
resiliparse
s5cmd
trafilatura==2.0.0
warcio
fasttext==0.9.3
sentencepiece
mwparserfromhell==0.6.5
peft
ftfy
sentence-transformers

[text_cuda12]
nemo_curator[cuda12]
nemo_curator[deduplication_cuda12]
nemo_curator[text_cpu]
nemo_curator[vllm]

[video_cpu]
av==15.1.0
opencv-python
torchvision
einops
easydict

[video_cuda12]
nemo_curator[video_cpu]
nemo_curator[cuda12]
nemo_curator[vllm]
cvcuda_cu12
pycuda
torch<=2.9.1
torchaudio

[video_cuda12:platform_machine == "x86_64" and platform_system != "Darwin"]
flash-attn<=2.8.3
PyNvVideoCodec==2.0.2

[vllm]

[vllm:platform_machine == "x86_64" and platform_system != "Darwin"]
vllm>=0.14.1
