torch
torchvision
torchaudio
transformers==4.57.6
huggingface_hub==0.36.2
qwen_asr==0.0.6
nemo_toolkit[asr,tts]==2.7.2
jiwer
evaluate
Levenshtein
tqdm
editdistance
numpy
librosa
soundfile
backoff==2.2.1
mistral-common[audio]
