stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3

[all]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
pyttsx3>=2.99
azure-cognitiveservices-speech>=1.50.0
elevenlabs>=2.49.0
openai>=2.38.0
gtts>=2.5.4
coqui_tts>=0.27.5
edge-tts>=7.2.8
kokoro>=0.9.4
camb-sdk>=1.5.11
requests>=2.34.2
cartesia==3.1.0
typecast-python>=0.3.0
faster-qwen3-tts>=0.2.6
snac>=1.2.1
omnivoice>=0.1.5
chatterbox-tts
sopro>=1.5.0
soprano-tts>=0.2.0
neutts
pocket-tts>=2.1.0
torch
scipy
safetensors
huggingface-hub>=0.36.0
torchaudio
numpy
vocos
cn2an>=0.5.24
inflect
jieba>=0.42.1
lhotse
librosa
onnxruntime
piper_phonemize
pypinyin>=0.55.0
setuptools<81
tensorboard
transformers<=4.57.6
nltk
munch
PyYAML
phonemizer
transformers
soundfile>=0.13.1

[azure]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
azure-cognitiveservices-speech>=1.50.0

[camb]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
camb-sdk>=1.5.11

[cartesia]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
cartesia==3.1.0

[chatterbox]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
chatterbox-tts

[coqui]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
coqui_tts>=0.27.5

[edge]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
edge-tts>=7.2.8

[elevenlabs]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
elevenlabs>=2.49.0

[gtts]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
gtts>=2.5.4

[jp]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
mecab-python3>=1.0.12
unidic-lite>=1.0.8
cutlet
fugashi>=1.5.2
jaconv>=0.5.0
mojimoji>=0.0.13
pyopenjtalk>=0.4.1

[ko]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
hangul_romanize

[kokoro]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
kokoro>=0.9.4

[luxtts]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
cn2an>=0.5.24
inflect
jieba>=0.42.1
lhotse
librosa
numpy
onnxruntime
piper_phonemize
pypinyin>=0.55.0
safetensors
setuptools<81
tensorboard
torch
torchaudio
transformers<=4.57.6
vocos

[minimal]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3

[minimax]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
requests>=2.34.2

[modelslab]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
requests>=2.34.2

[moss]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
numpy
soundfile>=0.13.1
torch
torchaudio
onnxruntime
huggingface-hub>=0.36.0
nltk

[moss-tts]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
numpy
soundfile>=0.13.1
torch
torchaudio
onnxruntime
huggingface-hub>=0.36.0
nltk

[neutts]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
neutts

[neutts-gguf]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
neutts[llama,onnx]

[omnivoice]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
omnivoice>=0.1.5

[openai]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
openai>=2.38.0

[orpheus]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
snac>=1.2.1

[parler]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
torch
transformers

[piper]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3

[pocket]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
pocket-tts>=2.1.0
torch

[pocket-gpu]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
torch
scipy
safetensors
huggingface-hub>=0.36.0

[pockettts]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
pocket-tts>=2.1.0
torch

[pockettts-gpu]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
torch
scipy
safetensors
huggingface-hub>=0.36.0

[qwen]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
faster-qwen3-tts>=0.2.6

[soprano]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
soprano-tts>=0.2.0

[sopro]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
sopro>=1.5.0

[style]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
torch
torchaudio
numpy
librosa
nltk
munch
PyYAML
phonemizer

[styletts]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
torch
torchaudio
numpy
librosa
nltk
munch
PyYAML
phonemizer

[system]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
pyttsx3>=2.99

[typecast]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
typecast-python>=0.3.0

[zh]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
pypinyin>=0.55.0
ordered_set>=4.1.0
jieba>=0.42.1
cn2an>=0.5.24

[zipvoice]
stream2sentence>=0.3.2
pydub>=0.25.1
pyaudio>=0.2.14
resampy==0.4.3
torch
torchaudio
numpy
huggingface-hub>=0.36.0
safetensors
vocos
