xoscar>=0.9.6
torch
gradio<6.0.0
pillow
click<8.2.0
tqdm>=4.27
tabulate
requests
aiohttp
pydantic
fastapi>=0.110.3
uvicorn
huggingface-hub>=0.19.4
typing_extensions
modelscope>=1.19.0
sse_starlette>=1.6.5
openai>=1.40.0
python-jose[cryptography]
bcrypt>=4.0.0
aioprometheus[starlette]>=23.12.0
nvidia-ml-py
pynvml>=12
async-timeout
peft<=0.17.1
timm
setproctitle
uv

[all]
anthropic
xllamacpp>=0.2.0
transformers>=4.53.3
torch
accelerate>=0.28.0
sentencepiece
transformers_stream_generator
protobuf
einops
tiktoken
optimum
attrdict
timm>=0.9.16
torchvision
peft
eva-decord
jj-pytorchvideo
qwen-vl-utils!=0.0.9
qwen_omni_utils
datamodel_code_generator
jsonschema
blobfile
xxhash
tomli
sentence-transformers>=3.1.0
FlagEmbedding
datasets>=3.4.0
diffusers>=0.32.0
controlnet_aux
deepcache
verovio>=4.3.1
tiktoken>=0.6.0
gguf
imageio-ffmpeg
funasr==1.2.7
omegaconf~=2.3.0
WeText
librosa
torch>=2.0.0
torchaudio>=2.0.0
ChatTTS>=0.2.1
lightning>=2.0.0
hydra-core>=1.3.2
inflect
conformer
gdown
pyarrow
HyperPyYAML
onnxruntime>=1.16.0
pyworld>=0.3.4
loguru
natsort
loralib
ormsgpack
cachetools
silero-vad
vector-quantize-pytorch<=1.17.3,>=1.14.24
torchdiffeq
x_transformers>=1.31.14
pypinyin
vocos
jieba
soundfile
cached_path
unidic-lite
cn2an
mecab-python3
num2words
pykakasi
fugashi
g2p_en
anyascii
gruut[de,es,fr]
kokoro>=0.7.15
misaki[en,zh]>=0.7.15
langdetect
pyloudnorm
json5
munch
matplotlib
flatten_dict
julius
tensorboard
randomname
argbind

[all:sys_platform == "darwin" and platform_machine == "arm64"]
mlx-lm>=0.21.5
mlx-vlm>=0.3.4
mlx-whisper
f5-tts-mlx
mlx-audio

[all:sys_platform == "linux"]
bitsandbytes
vllm>=0.2.6
nemo_text_processing<=1.1.0

[anthropic]
anthropic

[audio]
funasr==1.2.7
omegaconf~=2.3.0
WeText
librosa
xxhash
torch>=2.0.0
torchaudio>=2.0.0
ChatTTS>=0.2.1
tiktoken
lightning>=2.0.0
hydra-core>=1.3.2
inflect
conformer
diffusers>=0.32.0
gdown
pyarrow
HyperPyYAML
onnxruntime>=1.16.0
pyworld>=0.3.4
loguru
natsort
loralib
ormsgpack
cachetools
silero-vad
vector-quantize-pytorch<=1.17.3,>=1.14.24
torchdiffeq
x_transformers>=1.31.14
pypinyin
tomli
vocos
jieba
soundfile
cached_path
unidic-lite
cn2an
mecab-python3
num2words
pykakasi
fugashi
g2p_en
anyascii
gruut[de,es,fr]
kokoro>=0.7.15
misaki[en,zh]>=0.7.15
langdetect
pyloudnorm
json5
munch
matplotlib
flatten_dict
julius
tensorboard
randomname
argbind

[audio:sys_platform == "linux"]
nemo_text_processing<=1.1.0

[benchmark]
psutil

[dev]
cython>=0.29
pytest>=3.5.0
pytest-cov>=2.5.0
pytest-timeout>=1.2.0
pytest-forked>=1.0
pytest-asyncio>=0.14.0
pytest-mock>=3.11.1
ipython>=6.5.0
sphinx>=3.0.0
pydata-sphinx-theme>=0.3.0
sphinx-intl>=0.9.9
jieba>=0.42.0
flake8>=3.8.0
black
openai>=1.40.0
anthropic
langchain
langchain-community
langchain-openai
orjson
sphinx-tabs
sphinx-design

[doc]
ipython>=6.5.0
sphinx>=3.0.0
pydata-sphinx-theme>=0.3.0
sphinx-intl>=0.9.9
sphinx-tabs
sphinx-design
prometheus_client
timm

[embedding]
sentence-transformers>=3.1.0
FlagEmbedding
datasets>=3.4.0

[image]
diffusers>=0.32.0
controlnet_aux
deepcache
verovio>=4.3.1
transformers>=4.53.3
tiktoken>=0.6.0
accelerate>=0.28.0
torch
torchvision
gguf

[intel]
torch==2.1.0a0
intel_extension_for_pytorch==2.1.10+xpu

[llama_cpp]
xllamacpp>=0.2.0

[mlx]
qwen_vl_utils!=0.0.9
tomli

[mlx:sys_platform == "darwin" and platform_machine == "arm64"]
mlx-lm>=0.21.5
mlx-vlm>=0.3.4
mlx-whisper
f5-tts-mlx
mlx-audio

[musa]
mthreads-ml-py>=2.2.8
torchada>=0.1.11

[otel]
opentelemetry-api>=1.20.0
opentelemetry-sdk>=1.20.0
opentelemetry-exporter-otlp-proto-http>=1.20.0
opentelemetry-exporter-otlp-proto-grpc>=1.20.0
opentelemetry-instrumentation-fastapi>=0.41b0
opentelemetry-instrumentation-httpx>=0.41b0

[rerank]
FlagEmbedding
datasets>=3.4.0

[sglang]

[sglang:sys_platform == "linux"]
sglang[srt]>=0.4.2.post4

[transformers]
transformers>=4.53.3
torch
accelerate>=0.28.0
sentencepiece
transformers_stream_generator
protobuf
einops
tiktoken
optimum
attrdict
timm>=0.9.16
torchvision
peft
eva-decord
jj-pytorchvideo
qwen-vl-utils!=0.0.9
qwen_omni_utils
datamodel_code_generator
jsonschema
blobfile

[transformers:sys_platform == "linux"]
bitsandbytes

[transformers_quantization]
gptqmodel
datasets>=3.4.0

[transformers_quantization:sys_platform != "darwin"]
autoawq!=0.2.6

[transformers_quantization:sys_platform == "linux"]
bitsandbytes

[video]
diffusers>=0.32.0
imageio-ffmpeg

[vllm]
xxhash

[vllm:sys_platform == "linux"]
vllm>=0.2.6
