.gitmodules
README.md
pyproject.toml
ruff.toml
ckpts/README.md
data/XVoice_Dataset_ipa_v6/vocab.txt
data/XVoice_Dataset_ipa_v6/vocab_stats.txt
src/rate_pred/README.md
src/rate_pred/configs/SpeedPredict_Multilingual.yaml
src/rate_pred/model/__init__.py
src/rate_pred/model/dataset.py
src/rate_pred/model/jp_syllable.py
src/rate_pred/model/modules.py
src/rate_pred/model/speed_predictor.py
src/rate_pred/model/trainer.py
src/rate_pred/model/utils.py
src/rate_pred/train/README.md
src/rate_pred/train/train.py
src/rate_pred/train/datasets/prepare_multilingual_speed.py
src/x_voice/__init__.py
src/x_voice/prepare_ipa.sh
src/x_voice.egg-info/PKG-INFO
src/x_voice.egg-info/SOURCES.txt
src/x_voice.egg-info/dependency_links.txt
src/x_voice.egg-info/entry_points.txt
src/x_voice.egg-info/requires.txt
src/x_voice.egg-info/top_level.txt
src/x_voice/configs/XVoice_Base_Infer.yaml
src/x_voice/configs/XVoice_Base_Stage1.yaml
src/x_voice/configs/XVoice_Base_Stage2.yaml
src/x_voice/eval/README.md
src/x_voice/eval/collect_results.py
src/x_voice/eval/ecapa_tdnn.py
src/x_voice/eval/eval_infer_batch.py
src/x_voice/eval/eval_librispeech_test_clean.py
src/x_voice/eval/eval_multilingual.sh
src/x_voice/eval/eval_multilingual_seedtts.sh
src/x_voice/eval/eval_seedtts_testset.py
src/x_voice/eval/eval_similarity.py
src/x_voice/eval/eval_utmos.py
src/x_voice/eval/requirements.txt
src/x_voice/eval/speaking_rate_predictor.py
src/x_voice/eval/text_normalizer.py
src/x_voice/eval/utils_eval.py
src/x_voice/eval/utils/average_wer.py
src/x_voice/eval/utils/cal_wer.sh
src/x_voice/eval/utils/get_wav_res_ref_text.py
src/x_voice/eval/utils/get_wav_res_ref_text_gt.py
src/x_voice/eval/utils/parse_options.sh
src/x_voice/eval/utils/run_wer.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/requirements.txt
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/run_paraformer.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/conf/cam++.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/conf/eres2net.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/conf/eres2net_para.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/local/compute_acc.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/local/predict.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/local/predict_para.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/local/prepare_pretrained_model.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/language-identification/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/requirements.txt
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/run_audio.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/run_video.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/conf/diar.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/conf/diar_video.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/DER.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/cluster_and_postprocess.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/compute_der.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/extract_diar_embeddings.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/extract_visual_embeddings.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/md-eval.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/prepare_subseg_json.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/vision_processer.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/voice_activity_detection.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/vision_tools/active_speaker_detection.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/vision_tools/face_detection.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/vision_tools/face_quality_assessment.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/speaker-diarization/local/vision_tools/face_recognition.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/conf/cam++.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-cam++/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/conf/ecapa_tdnn.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-ecapa/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/conf/eres2net.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/conf/eres2net_lm.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2net/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/conf/eres2netv2.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/conf/eres2netv2_lm.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-eres2netv2/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/conf/rdino.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/local/prepare_data_rdino.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/local/process_musan.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-rdino/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/conf/res2net.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-res2net/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/README.md
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/conf/resnet.yaml
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/3dspeaker/sv-resnet/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/README.md
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/requirements.txt
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/conf/config.yaml
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/local/extract_audio_clips.py
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/local/extract_video_clips.py
src/x_voice/eval/utils/3D-Speaker/egs/ava-asd/talknet/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/conf/cam++.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/local/flac2wav.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/local/prepare_data_cncb.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/utils/spk2utt_to_utt2spk.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-cam++/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/conf/ecapa_tdnn.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/local/flac2wav.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/local/prepare_data_cncb.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/utils/spk2utt_to_utt2spk.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-ecapa/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/conf/eres2net.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/conf/eres2net_lm.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/local/flac2wav.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/local/prepare_data_cncb.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/utils/spk2utt_to_utt2spk.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2net/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/conf/eres2netv2.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/conf/eres2netv2_lm.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/local/flac2wav.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/local/prepare_data_cncb.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/utils/spk2utt_to_utt2spk.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-eres2netv2/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/conf/rdino.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/local/flac2wav.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/local/prepare_data_rdino.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/local/process_musan.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-rdino/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/conf/res2net.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/local/flac2wav.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/local/prepare_data_cncb.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-res2net/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/README.md
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/conf/resnet.yaml
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/local/flac2wav.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/local/prepare_data_cncb.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/cnceleb/sv-resnet/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/README.md
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/requirements.txt
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/run_dialogue_detection.sh
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/run_speaker_turn_detection.sh
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/bin/run_dialogue_detection.py
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/bin/run_speaker_turn_detection.py
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/local/download_aishell_4_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/local/download_alimeeting_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/local/merge_json_files_for_semantic_speaker.py
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/local/prepare_files_for_aishell_4.py
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/local/prepare_files_for_alimeeting.py
src/x_voice/eval/utils/3D-Speaker/egs/semantic_speaker/bert/local/prepare_json_files_for_semantic_speaker.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/conf/cam++.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-cam++/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/conf/ecapa_tdnn.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-ecapa/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/conf/eres2net.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/conf/eres2net_lm.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2net/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/conf/eres2netv2.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/conf/eres2netv2_lm.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-eres2netv2/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/conf/rdino.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/local/prepare_data_rdino.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/local/process_musan.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-rdino/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/conf/res2net.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-res2net/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/conf/resnet.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-resnet/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/conf/sdpn.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/local/prepare_data_rdino.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/local/process_musan.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-sdpn/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/README.md
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/path.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/run.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/speakerlab
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/conf/tdnn.yaml
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/local/download_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/local/prepare_data.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/local/prepare_data_csv.py
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/utils/m4a2wav.pl
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/utils/parse_options.sh
src/x_voice/eval/utils/3D-Speaker/egs/voxceleb/sv-xvector/utils/utt2spk_to_spk2utt.pl
src/x_voice/eval/utils/3D-Speaker/pretrained/speech_eres2net_sv_en_voxceleb_16k/pretrained_eres2net.ckpt
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/CMakeLists.txt
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/README.md
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/assets/fbank_config.json
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/bin/CMakeLists.txt
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/bin/extract_speaker_embedding.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/bin/make_fbank_feature.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/bin/read_and_describe_wav.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/cmake/build_json.cmake
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/cmake/build_onnx.cmake
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/CMakeLists.txt
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_basic.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_basic.h
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_common.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_common.h
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_fbank.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_fbank.h
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_functions.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/feature/feature_functions.h
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/model/CMakeLists.txt
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/model/speaker_embedding_model.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/model/speaker_embedding_model.h
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/utils/CMakeLists.txt
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/utils/wav_reader.cpp
src/x_voice/eval/utils/3D-Speaker/runtime/onnxruntime/utils/wav_reader.h
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/compute_score_metrics.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/export_speaker_embedding_onnx.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/extract.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/extract_ssl.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/infer_sv.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/infer_sv_batch.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/infer_sv_ssl.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/train.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/train_asd.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/train_para.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/train_rdino.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/bin/train_sdpn.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/dataset/dataset.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/dataset/dataset_asd.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/dataset/dataset_rdino.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/dataset/dataset_sdpn.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/loss/dino_loss.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/loss/keleo_loss.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/loss/margin_loss.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/loss/sdpn_loss.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/campplus/DTDNN.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/campplus/classifier.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/campplus/layers.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/ecapa_tdnn/ECAPA_TDNN.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/eres2net/ERes2Net.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/eres2net/ERes2NetV2.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/eres2net/ERes2Net_huge.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/eres2net/__init__.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/eres2net/fusion.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/eres2net/pooling_layers.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/rdino/ECAPA_TDNN.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/rdino/RDINO_Head.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/rdino/combiner.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/res2net/Res2Net.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/resnet/ResNet.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/sdpn/ECAPA_TDNN.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/sdpn/SDPN_Head.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/sdpn/combiner.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/talknet/attentionLayer.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/talknet/audioEncoder.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/talknet/talknet.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/talknet/visualEncoder.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/models/xvector/TDNN.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/process/augmentation.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/process/cluster.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/process/processor.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/process/processor_para.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/process/scheduler.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/builder.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/checkpoint.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/config.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/epoch.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/fileio.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/score_metrics.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/utils.py
src/x_voice/eval/utils/3D-Speaker/speakerlab/utils/utils_rdino.py
src/x_voice/eval/utils/DNSMOS/README.md
src/x_voice/eval/utils/DNSMOS/dnsmos_local.py
src/x_voice/eval/utils/DNSMOS/dnsmos_local_wavscp.py
src/x_voice/eval/utils/DNSMOS/DNSMOS/bak_ovr.onnx
src/x_voice/eval/utils/DNSMOS/DNSMOS/model_v8.onnx
src/x_voice/eval/utils/DNSMOS/DNSMOS/sig.onnx
src/x_voice/eval/utils/DNSMOS/DNSMOS/sig_bak_ovr.onnx
src/x_voice/eval/utils/DNSMOS/pDNSMOS/sig_bak_ovr.onnx
src/x_voice/eval/utils/emo_eval/model/emotion2vec_plus_large/config.yaml
src/x_voice/eval/utils/emo_eval/model/emotion2vec_plus_large/configuration.json
src/x_voice/eval/utils/emo_eval/model/emotion2vec_plus_large/tokens.txt
src/x_voice/infer/README.md
src/x_voice/infer/infer_cli_stage1.py
src/x_voice/infer/infer_cli_stage2.py
src/x_voice/infer/infer_gradio.py
src/x_voice/infer/utils_infer.py
src/x_voice/infer/examples/vocab.txt
src/x_voice/infer/examples/basic/basic_ref_en.wav
src/x_voice/infer/examples/basic/basic_ref_zh.wav
src/x_voice/infer/examples/basic/basic_stage1.toml
src/x_voice/infer/examples/basic/basic_stage2.toml
src/x_voice/infer/examples/gradio_sample/ref_en.wav
src/x_voice/infer/examples/gradio_sample/ref_zh.wav
src/x_voice/model/__init__.py
src/x_voice/model/cfm.py
src/x_voice/model/cfm_sft.py
src/x_voice/model/dataset.py
src/x_voice/model/inferencer_gp.py
src/x_voice/model/modules.py
src/x_voice/model/trainer.py
src/x_voice/model/trainer_sft.py
src/x_voice/model/utils.py
src/x_voice/model/backbones/README.md
src/x_voice/model/backbones/dit.py
src/x_voice/model/backbones/mmdit.py
src/x_voice/model/backbones/unett.py
src/x_voice/scripts/count_max_epoch.py
src/x_voice/scripts/count_params_gflops.py
src/x_voice/train/README.md
src/x_voice/train/inference_gp.py
src/x_voice/train/train.py
src/x_voice/train/train_sft.py
src/x_voice/train/datasets/ipa_v3_tokenizer.py
src/x_voice/train/datasets/ipa_v6_tokenizer.py
src/x_voice/train/datasets/prepare_ipa.py
src/x_voice/train/datasets/prepare_ipa_sft.py
src/x_voice/train/datasets/prepare_ipa_stage2_gen_data.py
src/x_voice/train/datasets/test_ipav6.py