numpy<3,>=1.23
pandas>=1.5
pydantic>=2.0
pyyaml>=6.0
typer>=0.9
rich>=13.0
tqdm>=4.0

[active-speaker-mouth]
mediapipe>=0.10
opencv-python-headless>=4.8
Pillow>=10.0
dlib>=19.24

[all]
WhoSpoke[asr]
WhoSpoke[diarization]
WhoSpoke[vision]
WhoSpoke[voice]

[asr]
WhoSpoke[asr-whisper]
WhoSpoke[asr-faster-whisper]
WhoSpoke[asr-vosk]
WhoSpoke[asr-wav2vec]

[asr-faster-whisper]
faster-whisper>=1.0

[asr-vosk]
vosk>=0.3.45

[asr-wav2vec]
transformers>=4.30
torch>=2.0
torchaudio>=2.0
soundfile>=0.12

[asr-whisper]
openai-whisper>=20231117

[dev]
pytest>=7.0
ruff>=0.4
mypy>=1.8
black>=24.0
build>=1.0

[diarization]
WhoSpoke[diarization-pyannote]

[diarization-basic]

[diarization-pyannote]
pyannote.audio>=3.0

[face-deepface]
deepface>=0.0.89
opencv-python-headless>=4.8
Pillow>=10.0

[face-recognition]
face-recognition>=1.3.0
opencv-python-headless>=4.8
Pillow>=10.0

[vision]
WhoSpoke[face-deepface]
WhoSpoke[face-recognition]
WhoSpoke[active-speaker-mouth]

[voice]
WhoSpoke[voice-speechbrain]
WhoSpoke[voice-resemblyzer]

[voice-resemblyzer]
resemblyzer>=0.1.4

[voice-speechbrain]
speechbrain>=0.5.16
torch>=2.0
torchaudio>=2.0
