.gitattributes
.gitignore
.pre-commit-config.yaml
.readthedocs.yml
CONTRIBUTING.md
LICENSE
README.md
collect_env.py
mkdocs.yml
pyproject.toml
setup.py
.buildkite/bootstrap-amd-omni.sh
.buildkite/bootstrap-intel-omni.sh
.buildkite/nightly-release-pipeline.yaml
.buildkite/pipeline-intel.yaml
.buildkite/pipeline.yml
.buildkite/release-pipeline.yaml
.buildkite/test-amd-merge.yml
.buildkite/test-amd-ready.yaml
.buildkite/test-amd.yaml
.buildkite/test-merge.yml
.buildkite/test-nightly.yml
.buildkite/test-ready.yml
.buildkite/test-template-amd-omni.j2
.buildkite/test-weekly.yml
.buildkite/scripts/docker_login_ecr_public.sh
.buildkite/scripts/generate-and-upload-nightly-index.sh
.buildkite/scripts/generate-nightly-index.py
.buildkite/scripts/upload-nightly-wheels.sh
.buildkite/scripts/upload_pipeline_with_skip_ci.sh
.buildkite/scripts/hardware_ci/run-amd-test.sh
.buildkite/scripts/hardware_ci/run-xpu-test.sh
.buildkite/scripts/hardware_ci/run_npu_test.sh
.claude/skills/readme.md
.claude/skills/add-diffusion-model/SKILL.md
.claude/skills/add-diffusion-model/references/cache-dit-patterns.md
.claude/skills/add-diffusion-model/references/custom-model-patterns.md
.claude/skills/add-diffusion-model/references/parallelism-patterns.md
.claude/skills/add-diffusion-model/references/transformer-adaptation.md
.claude/skills/add-diffusion-model/references/troubleshooting.md
.claude/skills/add-tts-model/SKILL.md
.claude/skills/add-tts-model/references/cuda-graph-example.md
.claude/skills/add-tts-model/references/optional-deps.md
.claude/skills/add-tts-model/references/precommit-dco.md
.claude/skills/add-tts-model/references/single-stage-ar.md
.claude/skills/vllm-omni-npu-upgrade/SKILL.md
.claude/skills/vllm-omni-npu-upgrade/references/gpu-to-npu-translation.md
.claude/skills/vllm-omni-npu-upgrade/references/omni-specific-blocks.md
.claude/skills/vllm-omni-npu-upgrade/references/workflow-checklist.md
.github/CODEOWNERS
.github/PULL_REQUEST_TEMPLATE.md
.github/mergify.yml
.github/ISSUE_TEMPLATE/100-documentation.yml
.github/ISSUE_TEMPLATE/200-installation.yml
.github/ISSUE_TEMPLATE/400-bug-report.yml
.github/ISSUE_TEMPLATE/500-feature-request.yml
.github/ISSUE_TEMPLATE/600-new-model.yml
.github/ISSUE_TEMPLATE/700-performance-discussion.yml
.github/ISSUE_TEMPLATE/750-RFC.yml
.github/ISSUE_TEMPLATE/config.yml
.github/scripts/detect_changed_tests.sh
.github/workflows/build_wheel.yml
.github/workflows/pre-commit.yml
apps/ComfyUI-vLLM-Omni/.gitignore
apps/ComfyUI-vLLM-Omni/LICENSE
apps/ComfyUI-vLLM-Omni/README.md
apps/ComfyUI-vLLM-Omni/__init__.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/__init__.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/nodes.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/utils/api_client.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/utils/format.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/utils/logger.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/utils/models.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/utils/types.py
apps/ComfyUI-vLLM-Omni/comfyui_vllm_omni/utils/validators.py
apps/ComfyUI-vLLM-Omni/docs/images/comfyui-chaining-services.jpg
apps/ComfyUI-vLLM-Omni/docs/images/comfyui-image-generation.jpg
apps/ComfyUI-vLLM-Omni/docs/images/comfyui-multi-stage.jpg
apps/ComfyUI-vLLM-Omni/docs/images/comfyui-tts.jpg
apps/ComfyUI-vLLM-Omni/docs/images/comfyui-understanding.jpg
apps/ComfyUI-vLLM-Omni/docs/images/comfyui-video-generation.jpg
apps/ComfyUI-vLLM-Omni/example_workflows/vLLM-Omni Chaining Services.json
apps/ComfyUI-vLLM-Omni/example_workflows/vLLM-Omni Image Generation.json
apps/ComfyUI-vLLM-Omni/example_workflows/vLLM-Omni Multimodal Understanding.json
apps/ComfyUI-vLLM-Omni/example_workflows/vLLM-Omni TTS.json
apps/ComfyUI-vLLM-Omni/example_workflows/vLLM-Omni Video Generation.json
apps/ComfyUI-vLLM-Omni/web/main.js
benchmarks/README.md
benchmarks/__init__.py
benchmarks/accuracy/README.md
benchmarks/accuracy/__init__.py
benchmarks/accuracy/common.py
benchmarks/accuracy/image_to_image/README.md
benchmarks/accuracy/image_to_image/__init__.py
benchmarks/accuracy/image_to_image/gedit_bench.py
benchmarks/accuracy/image_to_image/run_gedit_bench.py
benchmarks/accuracy/text_to_image/README.md
benchmarks/accuracy/text_to_image/__init__.py
benchmarks/accuracy/text_to_image/gbench.py
benchmarks/accuracy/text_to_image/run_gebench.py
benchmarks/build_dataset/download_process_data_seedtts.md
benchmarks/build_dataset/extract_tts_prompts.py
benchmarks/build_dataset/seed_tts_design/en/meta.lst
benchmarks/build_dataset/seed_tts_smoke/en/meta.lst
benchmarks/diffusion/README.md
benchmarks/diffusion/backends.py
benchmarks/diffusion/diffusion_benchmark_serving.py
benchmarks/diffusion/quantization_quality.py
benchmarks/diffusion/performance_dashboard/qwen_image_serving_performance.md
benchmarks/diffusion/performance_dashboard/wan_2_2_serving_performance.md
benchmarks/distributed/omni_connectors/README.md
benchmarks/distributed/omni_connectors/cross_node_mooncake_transfer_engine.py
benchmarks/fish-speech/bench_voice_cache.py
benchmarks/fish-speech/fish_bench_utils.py
benchmarks/glm_image/README.md
benchmarks/glm_image/__init__.py
benchmarks/glm_image/benchmark_glm_image.py
benchmarks/glm_image/huggingface/inference.py
benchmarks/glm_image/vllm-omni/inference.py
benchmarks/tts/README.md
benchmarks/tts/bench_tts.py
benchmarks/tts/bench_voxcpm_offline.py
benchmarks/tts/model_configs.yaml
benchmarks/tts/plot_results.py
docker/Dockerfile.ci
docker/Dockerfile.cuda
docker/Dockerfile.npu
docker/Dockerfile.npu.a3
docker/Dockerfile.rocm
docker/Dockerfile.xpu
docs/.nav.yml
docs/README.md
docs/api/README.md
docs/assets/WeChat.jpg
docs/cli/README.md
docs/cli/serve.md
docs/cli/bench/serve.md
docs/community/contact_us.md
docs/community/governance.md
docs/community/meetups.md
docs/community/volunteers.md
docs/configuration/README.md
docs/configuration/gpu_memory_utilization.md
docs/configuration/pd_disaggregation.md
docs/configuration/stage_configs.md
docs/contributing/DOCS_GUIDE.md
docs/contributing/README.md
docs/contributing/metrics.md
docs/contributing/profiling.md
docs/contributing/ci/.nav.yaml
docs/contributing/ci/CI_5levels.md
docs/contributing/ci/failures.md
docs/contributing/ci/test_guide.md
docs/contributing/ci/tests_markers.md
docs/contributing/ci/tests_style.md
docs/contributing/ci/test_examples/l4_doc_example_tests.inc.md
docs/contributing/ci/test_examples/l4_functionality_tests.inc.md
docs/contributing/ci/test_examples/l4_performance_tests.inc.md
docs/contributing/model/README.md
docs/contributing/model/adding_diffusion_model.md
docs/contributing/model/adding_omni_model.md
docs/contributing/model/adding_tts_model.md
docs/design/architecture_overview.md
docs/design/index.md
docs/design/qwen3_omni_tts_performance_optimization.md
docs/design/feature/async_chunk.md
docs/design/feature/cache_dit.md
docs/design/feature/cfg_parallel.md
docs/design/feature/diffusion_step_execution.md
docs/design/feature/disaggregated_inference.md
docs/design/feature/expert_parallel.md
docs/design/feature/hsdp.md
docs/design/feature/prefix_caching.md
docs/design/feature/ray_based_execution.md
docs/design/feature/sequence_parallel.md
docs/design/feature/teacache.md
docs/design/feature/tensor_parallel.md
docs/design/feature/vae_parallel.md
docs/design/feature/omni_connectors/mooncake_store_connector.md
docs/design/feature/omni_connectors/mooncake_transfer_engine_connector.md
docs/design/feature/omni_connectors/shared_memory_connector.md
docs/design/feature/omni_connectors/yuanrong_connector.md
docs/design/figures/omni/E2EL_s_vllm_omni_vs_transformers.png
docs/design/figures/omni/Mean_AUDIO_RTF_Baseline_vs_Batch.png
docs/design/figures/omni/Mean_AUDIO_RTF_Batch_CUDA_Graph_vs_Async_Chunk.png
docs/design/figures/omni/Mean_AUDIO_RTF_Batch_vs_Batch_CUDA_Graph.png
docs/design/figures/omni/Mean_AUDIO_TTFP_ms_Baseline_vs_Batch.png
docs/design/figures/omni/Mean_AUDIO_TTFP_ms_Batch_CUDA_Graph_vs_Async_Chunk.png
docs/design/figures/omni/Mean_AUDIO_TTFP_ms_Batch_vs_Batch_CUDA_Graph.png
docs/design/figures/omni/Mean_E2EL_ms_Baseline_vs_Batch.png
docs/design/figures/omni/Mean_E2EL_ms_Batch_CUDA_Graph_vs_Async_Chunk.png
docs/design/figures/omni/Mean_E2EL_ms_Batch_vs_Batch_CUDA_Graph.png
docs/design/figures/omni/RTF_vllm_omni_vs_transformers.png
docs/design/figures/omni/Summary_E2EL_ms_vs_features.png
docs/design/figures/omni/Summary_RTF_vs_features.png
docs/design/figures/omni/Summary_TTFP_ms_vs_features.png
docs/design/figures/omni/TTFP_s_vllm_omni_vs_transformers.png
docs/design/figures/tts/Mean_AUDIO_RTF_vllm_omni_vs_transformers.png
docs/design/figures/tts/Mean_AUDIO_TTFP_(ms)_vllm_omni_vs_transformers.png
docs/design/figures/tts/Mean_E2EL_(ms)_vllm_omni_vs_transformers.png
docs/design/figures/tts/Mean_mean_e2e_ms_baseline_vs_batch.png
docs/design/figures/tts/Mean_mean_e2e_ms_batch_vs_cuda_graph.png
docs/design/figures/tts/Mean_mean_e2e_ms_cuda_graph_vs_async_chunk.png
docs/design/figures/tts/Mean_mean_rtf_baseline_vs_batch.png
docs/design/figures/tts/Mean_mean_rtf_batch_vs_cuda_graph.png
docs/design/figures/tts/Mean_mean_rtf_cuda_graph_vs_async_chunk.png
docs/design/figures/tts/Mean_mean_ttfp_ms_baseline_vs_batch.png
docs/design/figures/tts/Mean_mean_ttfp_ms_batch_vs_cuda_graph.png
docs/design/figures/tts/Mean_mean_ttfp_ms_cuda_graph_vs_async_chunk.png
docs/design/figures/tts/Summary_mean_e2e_ms_vs_features.png
docs/design/figures/tts/Summary_mean_rtf_vs_features.png
docs/design/figures/tts/Summary_mean_ttfp_ms_vs_features.png
docs/design/module/ar_module.md
docs/design/module/async_omni_architecture.md
docs/design/module/dit_module.md
docs/design/module/entrypoint_module.md
docs/examples/README.md
docs/features/comfyui.md
docs/features/custom_pipeline.md
docs/features/sleep_mode.md
docs/getting_started/quickstart.md
docs/getting_started/installation/.nav.yml
docs/getting_started/installation/README.md
docs/getting_started/installation/gpu.md
docs/getting_started/installation/npu.md
docs/getting_started/installation/python_env_setup.inc.md
docs/getting_started/installation/gpu/cuda.inc.md
docs/getting_started/installation/gpu/musa.inc.md
docs/getting_started/installation/gpu/rocm.inc.md
docs/getting_started/installation/gpu/xpu.inc.md
docs/getting_started/installation/npu/npu.inc.md
docs/mkdocs/hooks/generate_api_readme.py
docs/mkdocs/hooks/generate_argparse.py
docs/mkdocs/hooks/generate_examples.py
docs/mkdocs/hooks/url_schemes.py
docs/mkdocs/javascript/edit_and_feedback.js
docs/mkdocs/javascript/mathjax.js
docs/mkdocs/javascript/mermaid.js
docs/mkdocs/javascript/slack_and_forum.js
docs/mkdocs/overrides/main.html
docs/mkdocs/overrides/partials/toc-item.html
docs/mkdocs/stylesheets/extra.css
docs/models/supported_models.md
docs/serving/audio_generate_api.md
docs/serving/diffusion_chat_api.md
docs/serving/image_edit_api.md
docs/serving/image_generation_api.md
docs/serving/speech_api.md
docs/serving/video_stream_api.md
docs/source/architecture/ar-dit-main-architecture.png
docs/source/architecture/ar-main-architecture.png
docs/source/architecture/async-chunk-architecture.png
docs/source/architecture/dit-main-architecture.png
docs/source/architecture/omni-modality-model-architecture.png
docs/source/architecture/qwen3-omni-async-chunk.png
docs/source/architecture/qwen3-omni-non-async-chunk.png
docs/source/architecture/vllm-omni-dataflow-between-stages.png
docs/source/architecture/vllm-omni-diffusion-flow.png
docs/source/architecture/vllm-omni-main-architecture.png
docs/source/architecture/vllm-omni-user-interface.png
docs/source/logos/vllm-logo-only-light.ico
docs/source/logos/vllm-omni-logo.png
docs/source/performance/qwen3-omni_e2e_performance.png
docs/source/performance/qwen3-omni_rtf_performance.png
docs/source/performance/qwen3-omni_ttfp_performance.png
docs/usage/faq.md
docs/user_guide/diffusion_features.md
docs/user_guide/feature_compatibility.md
docs/user_guide/diffusion/attention_backends.md
docs/user_guide/diffusion/cpu_offload_diffusion.md
docs/user_guide/diffusion/frame_interpolation.md
docs/user_guide/diffusion/lora.md
docs/user_guide/diffusion/step_execution.md
docs/user_guide/diffusion/cache_acceleration/cache_dit.md
docs/user_guide/diffusion/cache_acceleration/teacache.md
docs/user_guide/diffusion/parallelism/cfg_parallel.md
docs/user_guide/diffusion/parallelism/expert_parallel.md
docs/user_guide/diffusion/parallelism/hsdp.md
docs/user_guide/diffusion/parallelism/overview.md
docs/user_guide/diffusion/parallelism/sequence_parallel.md
docs/user_guide/diffusion/parallelism/tensor_parallel.md
docs/user_guide/diffusion/parallelism/vae_patch_parallel.md
docs/user_guide/examples/offline_inference/bagel.md
docs/user_guide/examples/offline_inference/cosyvoice3.md
docs/user_guide/examples/offline_inference/fish_speech.md
docs/user_guide/examples/offline_inference/glm_image.md
docs/user_guide/examples/offline_inference/helios.md
docs/user_guide/examples/offline_inference/hunyuan_image3.md
docs/user_guide/examples/offline_inference/image_to_image.md
docs/user_guide/examples/offline_inference/image_to_video.md
docs/user_guide/examples/offline_inference/internvla_a1.md
docs/user_guide/examples/offline_inference/mammothmodal2_preview.md
docs/user_guide/examples/offline_inference/mimo_audio.md
docs/user_guide/examples/offline_inference/qwen2_5_omni.md
docs/user_guide/examples/offline_inference/qwen3_omni.md
docs/user_guide/examples/offline_inference/qwen3_tts.md
docs/user_guide/examples/offline_inference/text_to_audio.md
docs/user_guide/examples/offline_inference/text_to_image.md
docs/user_guide/examples/offline_inference/text_to_video.md
docs/user_guide/examples/offline_inference/voxtral_tts.md
docs/user_guide/examples/offline_inference/x_to_video_audio.md
docs/user_guide/examples/online_serving/bagel.md
docs/user_guide/examples/online_serving/chart-helm.md
docs/user_guide/examples/online_serving/diffusers_pipeline_adapter.md
docs/user_guide/examples/online_serving/fish_speech.md
docs/user_guide/examples/online_serving/glm_image.md
docs/user_guide/examples/online_serving/image_to_image.md
docs/user_guide/examples/online_serving/image_to_video.md
docs/user_guide/examples/online_serving/mimo_audio.md
docs/user_guide/examples/online_serving/qwen2_5_omni.md
docs/user_guide/examples/online_serving/qwen3_omni.md
docs/user_guide/examples/online_serving/qwen3_tts.md
docs/user_guide/examples/online_serving/text_to_audio.md
docs/user_guide/examples/online_serving/text_to_image.md
docs/user_guide/examples/online_serving/text_to_video.md
docs/user_guide/quantization/autoround.md
docs/user_guide/quantization/fp8.md
docs/user_guide/quantization/gguf.md
docs/user_guide/quantization/int8.md
docs/user_guide/quantization/msmodelslim.md
docs/user_guide/quantization/online.md
docs/user_guide/quantization/overview.md
examples/offline_inference/bagel/README.md
examples/offline_inference/bagel/end2end.py
examples/offline_inference/cosyvoice3/README.md
examples/offline_inference/cosyvoice3/verify_e2e_cosyvoice.py
examples/offline_inference/custom_pipeline/image_to_image/custom_pipeline.py
examples/offline_inference/custom_pipeline/image_to_image/image_edit.py
examples/offline_inference/custom_pipeline/image_to_image/run.sh
examples/offline_inference/dynin_omni/README.md
examples/offline_inference/dynin_omni/end2end.py
examples/offline_inference/fish_speech/README.md
examples/offline_inference/fish_speech/end2end.py
examples/offline_inference/helios/README.md
examples/offline_inference/helios/end2end.py
examples/offline_inference/hunyuan_image3/README.md
examples/offline_inference/hunyuan_image3/end2end.py
examples/offline_inference/image_to_image/image_edit.py
examples/offline_inference/image_to_image/image_to_image.md
examples/offline_inference/image_to_image/run_qwen_image_edit_2511.sh
examples/offline_inference/image_to_video/README.md
examples/offline_inference/image_to_video/image_to_video.py
examples/offline_inference/internvla_a1/README.md
examples/offline_inference/internvla_a1/collect_results.sh
examples/offline_inference/internvla_a1/end2end.py
examples/offline_inference/internvla_a1/internvla_a1_common.py
examples/offline_inference/internvla_a1/run.sh
examples/offline_inference/magi_human/README.md
examples/offline_inference/magi_human/end2end.py
examples/offline_inference/mammothmodal2_preview/README.md
examples/offline_inference/mammothmodal2_preview/run_mammothmoda2_image_summarize.py
examples/offline_inference/mammothmodal2_preview/run_mammothmoda2_t2i.py
examples/offline_inference/mimo_audio/README.md
examples/offline_inference/mimo_audio/end2end.py
examples/offline_inference/mimo_audio/message_base64_wav.json
examples/offline_inference/mimo_audio/message_convert.py
examples/offline_inference/mimo_audio/process_speechdata.py
examples/offline_inference/ming_flash_omni/README.md
examples/offline_inference/ming_flash_omni/end2end.py
examples/offline_inference/ming_flash_omni_tts/README.md
examples/offline_inference/ming_flash_omni_tts/end2end.py
examples/offline_inference/moss_tts_nano/README.md
examples/offline_inference/moss_tts_nano/end2end.py
examples/offline_inference/omnivoice/README.md
examples/offline_inference/omnivoice/end2end.py
examples/offline_inference/qwen2_5_omni/README.md
examples/offline_inference/qwen2_5_omni/end2end.py
examples/offline_inference/qwen2_5_omni/extract_prompts.py
examples/offline_inference/qwen2_5_omni/run_multiple_prompts.sh
examples/offline_inference/qwen2_5_omni/run_single_prompt.sh
examples/offline_inference/qwen3_omni/README.md
examples/offline_inference/qwen3_omni/end2end.py
examples/offline_inference/qwen3_omni/end2end_async_chunk.py
examples/offline_inference/qwen3_omni/run_multiple_prompts.sh
examples/offline_inference/qwen3_omni/run_multiple_prompts_async_chunk.sh
examples/offline_inference/qwen3_omni/run_single_prompt.sh
examples/offline_inference/qwen3_omni/run_single_prompt_async_chunk.sh
examples/offline_inference/qwen3_omni/run_single_prompt_tp.sh
examples/offline_inference/qwen3_omni/text_prompts_10.txt
examples/offline_inference/qwen3_tts/README.md
examples/offline_inference/qwen3_tts/benchmark_prompts.txt
examples/offline_inference/qwen3_tts/end2end.py
examples/offline_inference/text_to_audio/README.md
examples/offline_inference/text_to_audio/text_to_audio.py
examples/offline_inference/text_to_image/README.md
examples/offline_inference/text_to_image/gradio_demo.py
examples/offline_inference/text_to_image/text_to_image.py
examples/offline_inference/text_to_video/text_to_video.md
examples/offline_inference/text_to_video/text_to_video.py
examples/offline_inference/vace/vace_video_generation.md
examples/offline_inference/vace/vace_video_generation.py
examples/offline_inference/voxcpm/README.md
examples/offline_inference/voxcpm/end2end.py
examples/offline_inference/voxcpm2/README.md
examples/offline_inference/voxcpm2/end2end.py
examples/offline_inference/voxtral_tts/README.md
examples/offline_inference/voxtral_tts/end2end.py
examples/offline_inference/x_to_video_audio/download_dreamid_omni.py
examples/offline_inference/x_to_video_audio/x_to_video_audio.md
examples/offline_inference/x_to_video_audio/x_to_video_audio.py
examples/online_serving/openai_chat_completion_client_for_multimodal_generation.py
examples/online_serving/bagel/README.md
examples/online_serving/bagel/openai_chat_client.py
examples/online_serving/bagel/run_server.sh
examples/online_serving/bagel/run_server_stage_cli.sh
examples/online_serving/chart-helm/.helmignore
examples/online_serving/chart-helm/Chart.yaml
examples/online_serving/chart-helm/README.md
examples/online_serving/chart-helm/ct.yaml
examples/online_serving/chart-helm/lintconf.yaml
examples/online_serving/chart-helm/values.yaml
examples/online_serving/chart-helm/templates/_helpers.tpl
examples/online_serving/chart-helm/templates/configmap.yaml
examples/online_serving/chart-helm/templates/custom-objects.yaml
examples/online_serving/chart-helm/templates/deployment.yaml
examples/online_serving/chart-helm/templates/hpa.yaml
examples/online_serving/chart-helm/templates/ingress.yaml
examples/online_serving/chart-helm/templates/poddisruptionbudget.yaml
examples/online_serving/chart-helm/templates/pvc.yaml
examples/online_serving/chart-helm/templates/secrets.yaml
examples/online_serving/chart-helm/templates/service.yaml
examples/online_serving/chart-helm/tests/deployment_test.yaml
examples/online_serving/chart-helm/tests/ingress_test.yaml
examples/online_serving/chart-helm/tests/pvc_test.yaml
examples/online_serving/chart-helm/tests/secrets_test.yaml
examples/online_serving/diffusers_pipeline_adapter/README.md
examples/online_serving/diffusers_pipeline_adapter/stage_config.yaml
examples/online_serving/dynin_omni/README.md
examples/online_serving/dynin_omni/openai_chat_completion_client_for_multimodal_generation.py
examples/online_serving/fish_speech/README.md
examples/online_serving/fish_speech/gradio_demo.py
examples/online_serving/fish_speech/run_gradio_demo.sh
examples/online_serving/fish_speech/run_server.sh
examples/online_serving/fish_speech/speech_client.py
examples/online_serving/helios/README.md
examples/online_serving/helios/helios_client.py
examples/online_serving/helios/run_helios_distilled.sh
examples/online_serving/helios/run_helios_mid_stage2.sh
examples/online_serving/helios/run_helios_t2v.sh
examples/online_serving/image_to_image/README.md
examples/online_serving/image_to_image/gradio_demo.py
examples/online_serving/image_to_image/openai_chat_client.py
examples/online_serving/image_to_image/run_curl_image_edit.sh
examples/online_serving/image_to_image/run_server.sh
examples/online_serving/image_to_video/README.md
examples/online_serving/image_to_video/run_curl_hunyuan_video_15.sh
examples/online_serving/image_to_video/run_curl_image_to_video.sh
examples/online_serving/image_to_video/run_server.sh
examples/online_serving/image_to_video/run_server_hunyuan_video_15.sh
examples/online_serving/mimo_audio/README.md
examples/online_serving/mimo_audio/chat_template.jinja
examples/online_serving/mimo_audio/openai_chat_completion_client_for_multimodal_generation.py
examples/online_serving/ming_flash_omni/README.md
examples/online_serving/ming_flash_omni/run_curl_multimodal_generation.sh
examples/online_serving/ming_flash_omni_tts/README.md
examples/online_serving/ming_flash_omni_tts/run_server.sh
examples/online_serving/ming_flash_omni_tts/speech_client.py
examples/online_serving/moss_tts_nano/README.md
examples/online_serving/moss_tts_nano/gradio_demo.py
examples/online_serving/moss_tts_nano/run_gradio_demo.sh
examples/online_serving/moss_tts_nano/run_server.sh
examples/online_serving/omnivoice/README.md
examples/online_serving/omnivoice/run_server.sh
examples/online_serving/omnivoice/speech_client.py
examples/online_serving/qwen2_5_omni/README.md
examples/online_serving/qwen2_5_omni/gradio_demo.py
examples/online_serving/qwen2_5_omni/run_curl_multimodal_generation.sh
examples/online_serving/qwen2_5_omni/run_gradio_demo.sh
examples/online_serving/qwen3_omni/README.md
examples/online_serving/qwen3_omni/gradio_demo.py
examples/online_serving/qwen3_omni/openai_realtime_client.py
examples/online_serving/qwen3_omni/qwen3_omni_moe_thinking.yaml
examples/online_serving/qwen3_omni/run_curl_multimodal_generation.sh
examples/online_serving/qwen3_omni/run_gradio_demo.sh
examples/online_serving/qwen3_omni/streaming_video_client.py
examples/online_serving/qwen3_tts/README.md
examples/online_serving/qwen3_tts/batch_speech_client.py
examples/online_serving/qwen3_tts/gradio_demo.py
examples/online_serving/qwen3_tts/openai_speech_client.py
examples/online_serving/qwen3_tts/run_gradio_demo.sh
examples/online_serving/qwen3_tts/run_server.sh
examples/online_serving/qwen3_tts/speaker_embedding_interpolation.py
examples/online_serving/qwen3_tts/streaming_speech_client.py
examples/online_serving/qwen3_tts/tts_common.py
examples/online_serving/stable_audio/README.md
examples/online_serving/stable_audio/curl_examples.sh
examples/online_serving/stable_audio/stable_audio_client.py
examples/online_serving/text_to_image/README.md
examples/online_serving/text_to_image/gradio_demo.py
examples/online_serving/text_to_image/openai_chat_client.py
examples/online_serving/text_to_image/run_curl_text_to_image.sh
examples/online_serving/text_to_image/run_server.sh
examples/online_serving/text_to_video/README.md
examples/online_serving/text_to_video/run_curl_hunyuan_video_15.sh
examples/online_serving/text_to_video/run_curl_ltx2.sh
examples/online_serving/text_to_video/run_curl_text_to_video.sh
examples/online_serving/text_to_video/run_server.sh
examples/online_serving/text_to_video/run_server_hunyuan_video_15.sh
examples/online_serving/text_to_video/run_server_ltx2.sh
examples/online_serving/voxcpm/README.md
examples/online_serving/voxcpm/openai_speech_client.py
examples/online_serving/voxcpm/run_server.sh
examples/online_serving/voxcpm2/README.md
examples/online_serving/voxcpm2/gradio_demo.py
examples/online_serving/voxcpm2/openai_speech_client.py
examples/online_serving/voxtral_tts/gradio_demo.py
examples/online_serving/voxtral_tts/text_preprocess.py
recipes/README.md
recipes/TEMPLATE.md
recipes/LTX/LTX-2.3.md
recipes/Qwen/Qwen3-Omni.md
recipes/Wan-AI/Wan2.2-I2V.md
recipes/inclusionAI/Ming-flash-omni-2.0.md
requirements/common.txt
requirements/cpu.txt
requirements/cuda.txt
requirements/musa.txt
requirements/npu.txt
requirements/rocm.txt
requirements/xpu.txt
scripts/build_wheel.sh
tests/__init__.py
tests/conftest.py
tests/test_arg_utils.py
tests/test_config_factory.py
tests/test_data_entry_keys.py
tests/test_diffusion_config_fields.py
tests/test_diffusion_config_propagation.py
tests/test_fish_speech_voice_cache.py
tests/test_generate_nightly_perf_excel.py
tests/test_generate_nightly_perf_html.py
tests/test_outputs.py
tests/test_version.py
tests/test_voice_cache.py
tests/assets/cosyvoice3/zero_shot_prompt.wav
tests/assets/qwen3_tts/clone_2.wav
tests/benchmarks/conftest.py
tests/benchmarks/test_accuracy_bench_utils.py
tests/benchmarks/test_bench_tts_cli.py
tests/benchmarks/test_diffusion_backends_metrics.py
tests/benchmarks/test_seed_tts_dataset_variants.py
tests/benchmarks/test_serve_cli.py
tests/benchmarks/metrics/test_metrics.py
tests/benchmarks/patch/test_patch.py
tests/comfyui/conftest.py
tests/comfyui/test_comfyui_integration.py
tests/config/__init__.py
tests/config/test_pipeline_registry.py
tests/core/test_prefix_cache.py
tests/core/sched/test_chunk_scheduling_coordinator.py
tests/core/sched/test_generation_scheduler_restore.py
tests/core/sched/test_omni_scheduler_mixin.py
tests/dfx/conftest.py
tests/dfx/perf/scripts/diffusion_result_template.json
tests/dfx/perf/scripts/result_omni_template.json
tests/dfx/perf/scripts/run_benchmark.py
tests/dfx/perf/scripts/run_diffusion_benchmark.py
tests/dfx/perf/tests/test_ltx2_vllm_omni.json
tests/dfx/perf/tests/test_qwen_image_edit_2509_vllm_omni.json
tests/dfx/perf/tests/test_qwen_image_edit_vllm_omni.json
tests/dfx/perf/tests/test_qwen_image_layered_vllm_omni.json
tests/dfx/perf/tests/test_qwen_image_vllm_omni.json
tests/dfx/perf/tests/test_qwen_omni.json
tests/dfx/perf/tests/test_runner_metadata.py
tests/dfx/perf/tests/test_tts.json
tests/dfx/perf/tests/test_wan22_i2v_vllm_omni.json
tests/dfx/reliability/README.md
tests/dfx/reliability/conftest.py
tests/dfx/reliability/helpers.py
tests/dfx/reliability/test_reliability_qwen3_omni.py
tests/dfx/reliability/test_reliability_wan22.py
tests/dfx/stability/README.md
tests/dfx/stability/conftest.py
tests/dfx/stability/helpers.py
tests/dfx/stability/scripts/__init__.py
tests/dfx/stability/scripts/generate_report.py
tests/dfx/stability/scripts/resource_monitor.sh
tests/dfx/stability/scripts/test_stability_qwen3_omni.py
tests/dfx/stability/scripts/test_stability_qwen3_tts.py
tests/dfx/stability/scripts/test_stability_qwen_image.py
tests/dfx/stability/scripts/test_stability_wan22.py
tests/dfx/stability/tests/test_qwen3_omni.json
tests/dfx/stability/tests/test_qwen3_tts.json
tests/dfx/stability/tests/test_qwen_image.json
tests/dfx/stability/tests/test_wan22.json
tests/diffusion/test_data_is_moe.py
tests/diffusion/test_diffusers_adapter.py
tests/diffusion/test_diffusion_engine_metrics.py
tests/diffusion/test_diffusion_model_runner.py
tests/diffusion/test_diffusion_request.py
tests/diffusion/test_diffusion_scheduler.py
tests/diffusion/test_diffusion_step_pipeline.py
tests/diffusion/test_diffusion_worker.py
tests/diffusion/test_diffusion_worker_cuda_profiler.py
tests/diffusion/test_inline_stage_diffusion_client.py
tests/diffusion/test_multiproc_engine_concurrency.py
tests/diffusion/test_stage_diffusion_proc.py
tests/diffusion/test_worker_wrapper_base.py
tests/diffusion/attention/test_attention_sp.py
tests/diffusion/attention/test_flash_attn.py
tests/diffusion/attention/test_ulysses_uaa.py
tests/diffusion/cache/__init__.py
tests/diffusion/cache/test_cache_backends.py
tests/diffusion/cache/test_cache_dit.py
tests/diffusion/cache/test_teacache_extractors.py
tests/diffusion/distributed/test_autoencoder_kl_wan.py
tests/diffusion/distributed/test_autoencoder_kl_wan_encode.py
tests/diffusion/distributed/test_cfg_parallel.py
tests/diffusion/distributed/test_comm.py
tests/diffusion/distributed/test_distributed_vae_executor.py
tests/diffusion/distributed/test_hsdp.py
tests/diffusion/distributed/test_parallel_state_sp_groups.py
tests/diffusion/distributed/test_sp_plan_hooks.py
tests/diffusion/distributed/test_ulysses_uaa_perf.py
tests/diffusion/distributed/test_vae_patch_parallel.py
tests/diffusion/hooks/test_hook_registry.py
tests/diffusion/layers/__init__.py
tests/diffusion/layers/test_adalayernorm.py
tests/diffusion/layers/test_norm.py
tests/diffusion/layers/test_rotary_emb_equivalence.py
tests/diffusion/lora/helpers.py
tests/diffusion/lora/test_base_linear.py
tests/diffusion/lora/test_lora_manager.py
tests/diffusion/model_loader/test_diffusers_loader.py
tests/diffusion/model_loader/test_diffusers_loader_gguf.py
tests/diffusion/model_loader/gguf_adapters/test_flux2_klein_gguf_adapter.py
tests/diffusion/model_loader/gguf_adapters/test_qwen_image_gguf_adapter.py
tests/diffusion/model_loader/gguf_adapters/test_z_image_gguf_adapter.py
tests/diffusion/models/bagel/__init__.py
tests/diffusion/models/bagel/test_bagel_lora.py
tests/diffusion/models/bagel/test_combine_cfg.py
tests/diffusion/models/bagel/test_trajectory_recording.py
tests/diffusion/models/dmd2/__init__.py
tests/diffusion/models/dmd2/test_dmd2_request_sanitization.py
tests/diffusion/models/dmd2/test_dmd2_scheduler.py
tests/diffusion/models/flux/__init__.py
tests/diffusion/models/flux/test_flux_prefix_propagation.py
tests/diffusion/models/flux2/test_flux2_transformer_tp.py
tests/diffusion/models/glm_image/test_glm_image_sp.py
tests/diffusion/models/hunyuan_image3/test_hunyuan_fused_moe.py
tests/diffusion/models/hunyuan_image3/test_hunyuan_image3_sampler.py
tests/diffusion/models/ltx2/test_ltx2_3_pipeline.py
tests/diffusion/models/ltx2/test_ltx2_cfg_parallel_adaptation.py
tests/diffusion/models/ltx2/test_ltx2_hsdp.py
tests/diffusion/models/nextstep_1_1/test_nextstep_cfg_parallel_layout.py
tests/diffusion/models/qwen_image/test_qwen_image_edit_plus.py
tests/diffusion/models/qwen_image/test_qwen_image_max_sequence_length.py
tests/diffusion/models/qwen_image/test_qwen_image_size_utils.py
tests/diffusion/models/stable_audio/test_stable_audio_hsdp.py
tests/diffusion/models/t5_encoder/test_t5_encoder_prefix.py
tests/diffusion/models/t5_encoder/test_t5_encoder_tp.py
tests/diffusion/models/wan2_2/__init__.py
tests/diffusion/models/wan2_2/conftest.py
tests/diffusion/models/wan2_2/test_wan22_i2v_pipeline.py
tests/diffusion/models/wan2_2/test_wan22_pipeline_diffuse.py
tests/diffusion/models/wan2_2/test_wan22_pipeline_helpers.py
tests/diffusion/models/wan2_2/test_wan22_ti2v_pipeline.py
tests/diffusion/models/wan2_2/test_wan22_vace_pipeline.py
tests/diffusion/models/z_image/test_zimage_tp_constraints.py
tests/diffusion/offloader/test_layerwise_backend.py
tests/diffusion/offloader/test_module_collector.py
tests/diffusion/offloader/test_sequential_backend.py
tests/diffusion/quantization/__init__.py
tests/diffusion/quantization/test_component_routing.py
tests/diffusion/quantization/test_fp8_config.py
tests/diffusion/quantization/test_gguf_config.py
tests/diffusion/quantization/test_inc_config.py
tests/diffusion/quantization/test_int8_config.py
tests/diffusion/quantization/test_quantization_quality.py
tests/distributed/omni_connectors/test_adapter_and_flow.py
tests/distributed/omni_connectors/test_basic_connectors.py
tests/distributed/omni_connectors/test_chunk_transfer_adapter.py
tests/distributed/omni_connectors/test_kv_flow.py
tests/distributed/omni_connectors/test_mooncake_transfer_engine_buffer.py
tests/distributed/omni_connectors/test_mooncake_transfer_engine_rdma.py
tests/distributed/omni_connectors/test_omni_connector_configs.py
tests/distributed/omni_connectors/test_shm_connector.py
tests/distributed/omni_connectors/test_tp_rank_aware.py
tests/distributed/omni_coordinator/test_load_balancer.py
tests/distributed/omni_coordinator/test_omni_coord_client_for_hub.py
tests/distributed/omni_coordinator/test_omni_coord_client_for_stage.py
tests/distributed/omni_coordinator/test_omni_coordinator.py
tests/e2e/__init__.py
tests/e2e/accuracy/conftest.py
tests/e2e/accuracy/helpers.py
tests/e2e/accuracy/test_gebench_h100_smoke.py
tests/e2e/accuracy/test_gedit_bench_h100_smoke.py
tests/e2e/accuracy/test_ltx2_3_video_similarity.py
tests/e2e/accuracy/test_qwen_image.py
tests/e2e/accuracy/test_qwen_image_edit.py
tests/e2e/accuracy/test_qwen_image_layered.py
tests/e2e/accuracy/qwen3_omni/__init__.py
tests/e2e/accuracy/qwen3_omni/qwen3_omni_acc_bench_core.py
tests/e2e/accuracy/qwen3_omni/run_qwen_omni_acc_benchmark.py
tests/e2e/accuracy/qwen3_omni/test_qwen3_omni.py
tests/e2e/accuracy/wan22_i2v/__init__.py
tests/e2e/accuracy/wan22_i2v/run_wan22_i2v_diffusers_cp.py
tests/e2e/accuracy/wan22_i2v/test_wan22_i2v_video_similarity.py
tests/e2e/accuracy/wan22_i2v/wan22_i2v_video_similarity_common.py
tests/e2e/offline_inference/__init__.py
tests/e2e/offline_inference/compute_lpips.py
tests/e2e/offline_inference/run_quantization_e2e.sh
tests/e2e/offline_inference/test_bagel_img2img.py
tests/e2e/offline_inference/test_bagel_lora.py
tests/e2e/offline_inference/test_bagel_text2img.py
tests/e2e/offline_inference/test_bagel_understanding.py
tests/e2e/offline_inference/test_cache_dit.py
tests/e2e/offline_inference/test_cosyvoice3.py
tests/e2e/offline_inference/test_diffusion_cpu_offload.py
tests/e2e/offline_inference/test_diffusion_layerwise_offload.py
tests/e2e/offline_inference/test_diffusion_lora.py
tests/e2e/offline_inference/test_dynin_omni.py
tests/e2e/offline_inference/test_expert_parallel.py
tests/e2e/offline_inference/test_flux.py
tests/e2e/offline_inference/test_flux2_klein.py
tests/e2e/offline_inference/test_flux_autoround_w4a16.py
tests/e2e/offline_inference/test_flux_kontext.py
tests/e2e/offline_inference/test_hunyuanimage3_text2img.py
tests/e2e/offline_inference/test_internvla_a1.py
tests/e2e/offline_inference/test_ltx2_cfg_parallel_parity.py
tests/e2e/offline_inference/test_magi_human.py
tests/e2e/offline_inference/test_mammoth_moda2.py
tests/e2e/offline_inference/test_ming_flash_omni.py
tests/e2e/offline_inference/test_moss_tts_nano.py
tests/e2e/offline_inference/test_omni_sleep_mode.py
tests/e2e/offline_inference/test_omnivoice.py
tests/e2e/offline_inference/test_ovis_image.py
tests/e2e/offline_inference/test_quantization_fp8.py
tests/e2e/offline_inference/test_qwen2_5_omni_autoround_w4a16.py
tests/e2e/offline_inference/test_qwen2_5_omni_expansion.py
tests/e2e/offline_inference/test_qwen3_omni.py
tests/e2e/offline_inference/test_qwen3_omni_autoround_w4a16.py
tests/e2e/offline_inference/test_qwen3_tts_base.py
tests/e2e/offline_inference/test_qwen3_tts_customvoice.py
tests/e2e/offline_inference/test_qwen_image_diffusion_batching.py
tests/e2e/offline_inference/test_sequence_parallel.py
tests/e2e/offline_inference/test_stable_audio_expansion.py
tests/e2e/offline_inference/test_t2i_model.py
tests/e2e/offline_inference/test_t2v_model.py
tests/e2e/offline_inference/test_teacache.py
tests/e2e/offline_inference/test_vae_decode_parallelism.py
tests/e2e/offline_inference/test_voxcpm.py
tests/e2e/offline_inference/test_voxcpm2.py
tests/e2e/offline_inference/test_voxtral_tts.py
tests/e2e/offline_inference/test_zimage_parallelism.py
tests/e2e/offline_inference/custom_pipeline/__init__.py
tests/e2e/offline_inference/custom_pipeline/flow_match_sde_scheduler.py
tests/e2e/offline_inference/custom_pipeline/qwen_image_pipeline_with_logprob.py
tests/e2e/offline_inference/custom_pipeline/test_async_omni_collective_rpc.py
tests/e2e/offline_inference/custom_pipeline/test_async_omni_qwen_image_generate.py
tests/e2e/offline_inference/custom_pipeline/test_worker_extension.py
tests/e2e/offline_inference/custom_pipeline/worker_extension.py
tests/e2e/online_serving/__init__.py
tests/e2e/online_serving/test_bagel_expansion.py
tests/e2e/online_serving/test_bagel_online.py
tests/e2e/online_serving/test_cosyvoice3_tts.py
tests/e2e/online_serving/test_diffusers_adapter.py
tests/e2e/online_serving/test_dynin_omni_expansion.py
tests/e2e/online_serving/test_flux2_expansion.py
tests/e2e/online_serving/test_flux2_klein_inpaint_expansion.py
tests/e2e/online_serving/test_flux_2_dev_expansion.py
tests/e2e/online_serving/test_flux_kontext_expansion.py
tests/e2e/online_serving/test_hunyuan_video_15_expansion.py
tests/e2e/online_serving/test_image_gen_edit.py
tests/e2e/online_serving/test_images_generations_lora.py
tests/e2e/online_serving/test_longcat_image_edit_expansion.py
tests/e2e/online_serving/test_longcat_image_expansion.py
tests/e2e/online_serving/test_mimo_audio.py
tests/e2e/online_serving/test_ming_flash_omni.py
tests/e2e/online_serving/test_moss_tts_nano.py
tests/e2e/online_serving/test_nextstep_expansion.py
tests/e2e/online_serving/test_omnivoice.py
tests/e2e/online_serving/test_qwen2_5_omni_expansion.py
tests/e2e/online_serving/test_qwen3_omni.py
tests/e2e/online_serving/test_qwen3_omni_expansion.py
tests/e2e/online_serving/test_qwen3_tts_base.py
tests/e2e/online_serving/test_qwen3_tts_base_expansion.py
tests/e2e/online_serving/test_qwen3_tts_batch.py
tests/e2e/online_serving/test_qwen3_tts_customvoice.py
tests/e2e/online_serving/test_qwen3_tts_customvoice_expansion.py
tests/e2e/online_serving/test_qwen3_tts_speaker_embedding.py
tests/e2e/online_serving/test_qwen3_tts_websocket.py
tests/e2e/online_serving/test_qwen_image_edit_expansion.py
tests/e2e/online_serving/test_qwen_image_expansion.py
tests/e2e/online_serving/test_qwen_image_layered_expansion.py
tests/e2e/online_serving/test_sd3_expansion.py
tests/e2e/online_serving/test_video_generation_api.py
tests/e2e/online_serving/test_voxtral_tts.py
tests/e2e/online_serving/test_wan22_expansion.py
tests/e2e/online_serving/test_wan_2_1_vace_expansion.py
tests/e2e/online_serving/test_zimage_expansion.py
tests/e2e/stage_configs/bailingmm_moe_v2_lite_ci.yaml
tests/e2e/stage_configs/bailingmm_moe_v2_lite_thinker_only_ci.yaml
tests/e2e/stage_configs/dynin_omni_ci.yaml
tests/engine/__init__.py
tests/engine/test_arg_utils.py
tests/engine/test_async_omni_engine_abort.py
tests/engine/test_async_omni_engine_input.py
tests/engine/test_async_omni_engine_outputs.py
tests/engine/test_async_omni_engine_stage_init.py
tests/engine/test_cfg_companion_tracker.py
tests/engine/test_cross_stage_lora.py
tests/engine/test_orchestrator.py
tests/engine/test_orchestrator_error_handling.py
tests/engine/test_orchestrator_kv_sender_info.py
tests/engine/test_output_modality.py
tests/engine/test_output_processor.py
tests/engine/test_single_stage_mode.py
tests/engine/test_stage_engine_core_client.py
tests/entrypoints/test_async_omni.py
tests/entrypoints/test_async_omni_diffusion_config.py
tests/entrypoints/test_omni_base_profiler.py
tests/entrypoints/test_omni_entrypoints.py
tests/entrypoints/test_omni_new_request_data.py
tests/entrypoints/test_omni_sleep_mode.py
tests/entrypoints/test_pd_disaggregation.py
tests/entrypoints/test_realtime_connection_helpers.py
tests/entrypoints/test_serve.py
tests/entrypoints/test_stage_utils.py
tests/entrypoints/test_utils.py
tests/entrypoints/openai_api/__init__.py
tests/entrypoints/openai_api/conftest_video.py
tests/entrypoints/openai_api/test_image_server.py
tests/entrypoints/openai_api/test_qwen3_omni_realtime_websocket.py
tests/entrypoints/openai_api/test_serving_audio_generate.py
tests/entrypoints/openai_api/test_serving_chat_metrics.py
tests/entrypoints/openai_api/test_serving_chat_multistage_generation.py
tests/entrypoints/openai_api/test_serving_chat_sampling_params.py
tests/entrypoints/openai_api/test_serving_chat_speaker.py
tests/entrypoints/openai_api/test_serving_speech.py
tests/entrypoints/openai_api/test_serving_speech_stream.py
tests/entrypoints/openai_api/test_serving_speech_voxcpm.py
tests/entrypoints/openai_api/test_serving_video_stream.py
tests/entrypoints/openai_api/test_stage_params.py
tests/entrypoints/openai_api/test_text_splitter.py
tests/entrypoints/openai_api/test_video_api_utils.py
tests/entrypoints/openai_api/test_video_frame_filter.py
tests/entrypoints/openai_api/test_video_server.py
tests/entrypoints/openai_api/test_video_stream_handler.py
tests/entrypoints/openai_api/test_video_stream_session.py
tests/examples/conftest.py
tests/examples/helpers.py
tests/examples/test_slerp_interpolation.py
tests/examples/offline_inference/__init__.py
tests/examples/offline_inference/test_qwen3_tts_estimator.py
tests/examples/offline_inference/test_text_to_image.py
tests/examples/online_serving/__init__.py
tests/examples/online_serving/test_qwen2_5_omni.py
tests/examples/online_serving/test_qwen3_omni.py
tests/examples/online_serving/test_text_to_image.py
tests/helpers/__init__.py
tests/helpers/assertions.py
tests/helpers/env.py
tests/helpers/mark.py
tests/helpers/media.py
tests/helpers/process.py
tests/helpers/runtime.py
tests/helpers/stage_config.py
tests/helpers/fixtures/__init__.py
tests/helpers/fixtures/env.py
tests/helpers/fixtures/log.py
tests/helpers/fixtures/run_args.py
tests/helpers/fixtures/runtime.py
tests/metrics/test_stats.py
tests/model_executor/models/registry.py
tests/model_executor/models/test_encoder_quant_config.py
tests/model_executor/models/test_fish_speech_regressions.py
tests/model_executor/models/test_fish_speech_voice_cache.py
tests/model_executor/models/test_omni_processing.py
tests/model_executor/models/cosyvoice3/test_cosyvoice3_components.py
tests/model_executor/models/cosyvoice3/test_cosyvoice3_model_helpers.py
tests/model_executor/models/cosyvoice3/test_cosyvoice3_utils.py
tests/model_executor/models/dynin_omni/test_dynin_omni_token2audio.py
tests/model_executor/models/glm_image/test_glm_image_ar.py
tests/model_executor/models/mimo_audio/test_mimo_audio_code2wav_batch_decode.py
tests/model_executor/models/ming_flash_omni/test_talker_cfm.py
tests/model_executor/models/ming_flash_omni/test_talker_modules.py
tests/model_executor/models/qwen2_5_omni/test_audio_length.py
tests/model_executor/models/qwen2_5_omni/test_qwen2_5_omni_embed.py
tests/model_executor/models/qwen3_tts/__init__.py
tests/model_executor/models/qwen3_tts/test_code_predictor_dtype.py
tests/model_executor/models/qwen3_tts/test_cuda_graph_decoder.py
tests/model_executor/models/qwen3_tts/test_qwen3_tts_code2wav.py
tests/model_executor/models/voxcpm2/__init__.py
tests/model_executor/models/voxcpm2/test_talker_state_eviction.py
tests/model_executor/models/voxtral_tts/__init__.py
tests/model_executor/models/voxtral_tts/test_audio_tokenizer_parsing.py
tests/model_executor/models/voxtral_tts/test_cuda_graph_acoustic_transformer.py
tests/model_executor/models/voxtral_tts/test_text_preprocess.py
tests/model_executor/stage_input_processors/test_cosyvoice3_stage_input_processors.py
tests/model_executor/stage_input_processors/test_glm_image.py
tests/model_executor/stage_input_processors/test_mimo_audio_flush_remaining_codes.py
tests/model_executor/stage_input_processors/test_mimo_audio_llm2code2wav.py
tests/model_executor/stage_input_processors/test_qwen3_omni_streaming_helpers.py
tests/model_executor/stage_input_processors/test_qwen3_tts_async_chunk.py
tests/model_executor/stage_input_processors/test_voxcpm_async_chunk.py
tests/model_executor/stage_input_processors/test_voxtral_tts_async_chunk.py
tests/profile/test_omni_torch_profiler.py
tests/utils/test_audio.py
tests/worker/test_gpu_generation_model_runner.py
tests/worker/test_omni_connector_mixin.py
tests/worker/test_omni_gpu_model_runner.py
tests/worker/test_process_gpu_memory.py
tools/configure_stage_memory.py
tools/nightly/buildkite_testcase_statistics.py
tools/nightly/generate_nightly_perf_excel.py
tools/nightly/generate_nightly_perf_html.py
tools/nightly/send_nightly_email.py
tools/pre_commit/check_pickle_imports.py
tools/wan22/assemble_wan22_i2v_diffusers.py
vllm_omni/__init__.py
vllm_omni/_version.py
vllm_omni/data_entry_keys.py
vllm_omni/logger.py
vllm_omni/outputs.py
vllm_omni/patch.py
vllm_omni/request.py
vllm_omni/version.py
vllm_omni.egg-info/PKG-INFO
vllm_omni.egg-info/SOURCES.txt
vllm_omni.egg-info/dependency_links.txt
vllm_omni.egg-info/entry_points.txt
vllm_omni.egg-info/requires.txt
vllm_omni.egg-info/top_level.txt
vllm_omni/assets/video.py
vllm_omni/benchmarks/serve.py
vllm_omni/benchmarks/data_modules/__init__.py
vllm_omni/benchmarks/data_modules/daily_omni_dataset.py
vllm_omni/benchmarks/data_modules/daily_omni_eval.py
vllm_omni/benchmarks/data_modules/daily_omni_text_audio.py
vllm_omni/benchmarks/data_modules/random_multi_modal_dataset.py
vllm_omni/benchmarks/data_modules/seed_tts_dataset.py
vllm_omni/benchmarks/data_modules/seed_tts_eval.py
vllm_omni/benchmarks/metrics/__init__.py
vllm_omni/benchmarks/metrics/metrics.py
vllm_omni/benchmarks/patch/__init__.py
vllm_omni/benchmarks/patch/patch.py
vllm_omni/config/__init__.py
vllm_omni/config/lora.py
vllm_omni/config/model.py
vllm_omni/config/pipeline_registry.py
vllm_omni/config/stage_config.py
vllm_omni/config/yaml_util.py
vllm_omni/core/__init__.py
vllm_omni/core/prefix_cache.py
vllm_omni/core/sched/__init__.py
vllm_omni/core/sched/omni_ar_scheduler.py
vllm_omni/core/sched/omni_generation_scheduler.py
vllm_omni/core/sched/omni_scheduler_mixin.py
vllm_omni/core/sched/omni_scheduling_coordinator.py
vllm_omni/core/sched/output.py
vllm_omni/deploy/bagel.yaml
vllm_omni/deploy/bagel_single_stage.yaml
vllm_omni/deploy/bagel_think.yaml
vllm_omni/deploy/cosyvoice3.yaml
vllm_omni/deploy/fish_qwen3_omni.yaml
vllm_omni/deploy/glm_image.yaml
vllm_omni/deploy/mimo_audio.yaml
vllm_omni/deploy/moss_tts_nano.yaml
vllm_omni/deploy/qwen2_5_omni.yaml
vllm_omni/deploy/qwen3_omni_moe.yaml
vllm_omni/deploy/qwen3_tts.yaml
vllm_omni/deploy/voxcpm2.yaml
vllm_omni/deploy/voxtral_tts.yaml
vllm_omni/diffusion/__init__.py
vllm_omni/diffusion/compile.py
vllm_omni/diffusion/data.py
vllm_omni/diffusion/diffusion_engine.py
vllm_omni/diffusion/envs.py
vllm_omni/diffusion/forward_context.py
vllm_omni/diffusion/inline_stage_diffusion_client.py
vllm_omni/diffusion/ipc.py
vllm_omni/diffusion/model_metadata.py
vllm_omni/diffusion/registry.py
vllm_omni/diffusion/request.py
vllm_omni/diffusion/stage_diffusion_client.py
vllm_omni/diffusion/stage_diffusion_proc.py
vllm_omni/diffusion/attention/__init__.py
vllm_omni/diffusion/attention/layer.py
vllm_omni/diffusion/attention/selector.py
vllm_omni/diffusion/attention/backends/__init__.py
vllm_omni/diffusion/attention/backends/abstract.py
vllm_omni/diffusion/attention/backends/flash_attn.py
vllm_omni/diffusion/attention/backends/registry.py
vllm_omni/diffusion/attention/backends/ring_flash_attn.py
vllm_omni/diffusion/attention/backends/ring_pytorch_attn.py
vllm_omni/diffusion/attention/backends/sage_attn.py
vllm_omni/diffusion/attention/backends/sdpa.py
vllm_omni/diffusion/attention/backends/ring/__init__.py
vllm_omni/diffusion/attention/backends/ring/ring_globals.py
vllm_omni/diffusion/attention/backends/ring/ring_kernels.py
vllm_omni/diffusion/attention/backends/ring/ring_selector.py
vllm_omni/diffusion/attention/backends/ring/ring_utils.py
vllm_omni/diffusion/attention/backends/utils/__init__.py
vllm_omni/diffusion/attention/backends/utils/fa.py
vllm_omni/diffusion/attention/parallel/__init__.py
vllm_omni/diffusion/attention/parallel/base.py
vllm_omni/diffusion/attention/parallel/factory.py
vllm_omni/diffusion/attention/parallel/ring.py
vllm_omni/diffusion/attention/parallel/ulysses.py
vllm_omni/diffusion/cache/__init__.py
vllm_omni/diffusion/cache/base.py
vllm_omni/diffusion/cache/cache_dit_backend.py
vllm_omni/diffusion/cache/selector.py
vllm_omni/diffusion/cache/teacache/__init__.py
vllm_omni/diffusion/cache/teacache/backend.py
vllm_omni/diffusion/cache/teacache/coefficient_estimator.py
vllm_omni/diffusion/cache/teacache/config.py
vllm_omni/diffusion/cache/teacache/extractors.py
vllm_omni/diffusion/cache/teacache/hook.py
vllm_omni/diffusion/cache/teacache/state.py
vllm_omni/diffusion/distributed/__init__.py
vllm_omni/diffusion/distributed/cfg_parallel.py
vllm_omni/diffusion/distributed/comm.py
vllm_omni/diffusion/distributed/group_coordinator.py
vllm_omni/diffusion/distributed/hsdp.py
vllm_omni/diffusion/distributed/hsdp_utils.py
vllm_omni/diffusion/distributed/parallel_state.py
vllm_omni/diffusion/distributed/sp_plan.py
vllm_omni/diffusion/distributed/sp_sharding.py
vllm_omni/diffusion/distributed/utils.py
vllm_omni/diffusion/distributed/vae_patch_parallel.py
vllm_omni/diffusion/distributed/autoencoders/__init__.py
vllm_omni/diffusion/distributed/autoencoders/autoencoder_kl.py
vllm_omni/diffusion/distributed/autoencoders/autoencoder_kl_qwenimage.py
vllm_omni/diffusion/distributed/autoencoders/autoencoder_kl_wan.py
vllm_omni/diffusion/distributed/autoencoders/distributed_vae_executor.py
vllm_omni/diffusion/executor/__init__.py
vllm_omni/diffusion/executor/abstract.py
vllm_omni/diffusion/executor/multiproc_executor.py
vllm_omni/diffusion/hooks/__init__.py
vllm_omni/diffusion/hooks/base.py
vllm_omni/diffusion/hooks/sequence_parallel.py
vllm_omni/diffusion/layers/__init__.py
vllm_omni/diffusion/layers/adalayernorm.py
vllm_omni/diffusion/layers/custom_op.py
vllm_omni/diffusion/layers/norm.py
vllm_omni/diffusion/layers/rope.py
vllm_omni/diffusion/lora/__init__.py
vllm_omni/diffusion/lora/manager.py
vllm_omni/diffusion/lora/utils.py
vllm_omni/diffusion/lora/layers/__init__.py
vllm_omni/diffusion/lora/layers/base_linear.py
vllm_omni/diffusion/lora/layers/column_parallel_linear.py
vllm_omni/diffusion/lora/layers/replicated_linear.py
vllm_omni/diffusion/lora/layers/row_parallel_linear.py
vllm_omni/diffusion/model_loader/__init__.py
vllm_omni/diffusion/model_loader/diffusers_loader.py
vllm_omni/diffusion/model_loader/hub_prefetch.py
vllm_omni/diffusion/model_loader/gguf_adapters/__init__.py
vllm_omni/diffusion/model_loader/gguf_adapters/base.py
vllm_omni/diffusion/model_loader/gguf_adapters/flux2_klein.py
vllm_omni/diffusion/model_loader/gguf_adapters/qwen_image.py
vllm_omni/diffusion/model_loader/gguf_adapters/z_image.py
vllm_omni/diffusion/models/__init__.py
vllm_omni/diffusion/models/interface.py
vllm_omni/diffusion/models/progress_bar.py
vllm_omni/diffusion/models/utils.py
vllm_omni/diffusion/models/bagel/__init__.py
vllm_omni/diffusion/models/bagel/autoencoder.py
vllm_omni/diffusion/models/bagel/bagel_transformer.py
vllm_omni/diffusion/models/bagel/pipeline_bagel.py
vllm_omni/diffusion/models/cosyvoice3_audio/__init__.py
vllm_omni/diffusion/models/cosyvoice3_audio/cosyvoice3_dit.py
vllm_omni/diffusion/models/diffusers_adapter/__init__.py
vllm_omni/diffusion/models/diffusers_adapter/pipeline_diffusers_adapter.py
vllm_omni/diffusion/models/dmd2/__init__.py
vllm_omni/diffusion/models/dmd2/mixin.py
vllm_omni/diffusion/models/dreamid_omni/__init__.py
vllm_omni/diffusion/models/dreamid_omni/fusion.py
vllm_omni/diffusion/models/dreamid_omni/pipeline_dreamid_omni.py
vllm_omni/diffusion/models/dreamid_omni/wan2_2.py
vllm_omni/diffusion/models/flux/__init__.py
vllm_omni/diffusion/models/flux/flux_pipeline_mixin.py
vllm_omni/diffusion/models/flux/flux_transformer.py
vllm_omni/diffusion/models/flux/pipeline_flux.py
vllm_omni/diffusion/models/flux/pipeline_flux_kontext.py
vllm_omni/diffusion/models/flux2/__init__.py
vllm_omni/diffusion/models/flux2/flux2_transformer.py
vllm_omni/diffusion/models/flux2/pipeline_flux2.py
vllm_omni/diffusion/models/flux2_klein/__init__.py
vllm_omni/diffusion/models/flux2_klein/flux2_klein_transformer.py
vllm_omni/diffusion/models/flux2_klein/pipeline_flux2_klein.py
vllm_omni/diffusion/models/glm_image/__init__.py
vllm_omni/diffusion/models/glm_image/glm_image_transformer.py
vllm_omni/diffusion/models/glm_image/pipeline_glm_image.py
vllm_omni/diffusion/models/helios/__init__.py
vllm_omni/diffusion/models/helios/helios_transformer.py
vllm_omni/diffusion/models/helios/pipeline_helios.py
vllm_omni/diffusion/models/helios/scheduling_helios.py
vllm_omni/diffusion/models/hunyuan_image3/__init__.py
vllm_omni/diffusion/models/hunyuan_image3/autoencoder.py
vllm_omni/diffusion/models/hunyuan_image3/hunyuan_fused_moe.py
vllm_omni/diffusion/models/hunyuan_image3/hunyuan_image3_tokenizer.py
vllm_omni/diffusion/models/hunyuan_image3/hunyuan_image3_transformer.py
vllm_omni/diffusion/models/hunyuan_image3/pipeline_hunyuan_image3.py
vllm_omni/diffusion/models/hunyuan_image3/system_prompt.py
vllm_omni/diffusion/models/hunyuan_video/__init__.py
vllm_omni/diffusion/models/hunyuan_video/hunyuan_video_15_transformer.py
vllm_omni/diffusion/models/hunyuan_video/pipeline_hunyuan_video_1_5.py
vllm_omni/diffusion/models/hunyuan_video/pipeline_hunyuan_video_1_5_i2v.py
vllm_omni/diffusion/models/internvla_a1/__init__.py
vllm_omni/diffusion/models/internvla_a1/adapter_qwen3_vl.py
vllm_omni/diffusion/models/internvla_a1/config.py
vllm_omni/diffusion/models/internvla_a1/cosmos_ci_torch.py
vllm_omni/diffusion/models/internvla_a1/model_cosmos.py
vllm_omni/diffusion/models/internvla_a1/model_internvla_a1.py
vllm_omni/diffusion/models/internvla_a1/pipeline_internvla_a1.py
vllm_omni/diffusion/models/longcat_image/__init__.py
vllm_omni/diffusion/models/longcat_image/longcat_image_transformer.py
vllm_omni/diffusion/models/longcat_image/pipeline_longcat_image.py
vllm_omni/diffusion/models/longcat_image/pipeline_longcat_image_edit.py
vllm_omni/diffusion/models/ltx2/__init__.py
vllm_omni/diffusion/models/ltx2/ltx2_transformer.py
vllm_omni/diffusion/models/ltx2/pipeline_ltx2.py
vllm_omni/diffusion/models/ltx2/pipeline_ltx2_3.py
vllm_omni/diffusion/models/ltx2/pipeline_ltx2_3_image2video.py
vllm_omni/diffusion/models/ltx2/pipeline_ltx2_image2video.py
vllm_omni/diffusion/models/ltx2/pipeline_ltx2_latent_upsample.py
vllm_omni/diffusion/models/magi_human/__init__.py
vllm_omni/diffusion/models/magi_human/magi_human_dit.py
vllm_omni/diffusion/models/magi_human/pipeline_magi_human.py
vllm_omni/diffusion/models/mammoth_moda2/mammothmoda2_dit_model.py
vllm_omni/diffusion/models/mammoth_moda2/pipeline_mammothmoda2_dit.py
vllm_omni/diffusion/models/mammoth_moda2/rope_real.py
vllm_omni/diffusion/models/mammoth_moda2/schedulers.py
vllm_omni/diffusion/models/nextstep_1_1/__init__.py
vllm_omni/diffusion/models/nextstep_1_1/modeling_flux_vae.py
vllm_omni/diffusion/models/nextstep_1_1/modeling_nextstep.py
vllm_omni/diffusion/models/nextstep_1_1/modeling_nextstep_heads.py
vllm_omni/diffusion/models/nextstep_1_1/modeling_nextstep_llama.py
vllm_omni/diffusion/models/nextstep_1_1/pipeline_nextstep_1_1.py
vllm_omni/diffusion/models/omnigen2/__init__.py
vllm_omni/diffusion/models/omnigen2/omnigen2_transformer.py
vllm_omni/diffusion/models/omnigen2/pipeline_omnigen2.py
vllm_omni/diffusion/models/omnivoice/__init__.py
vllm_omni/diffusion/models/omnivoice/pipeline_omnivoice.py
vllm_omni/diffusion/models/ovis_image/__init__.py
vllm_omni/diffusion/models/ovis_image/ovis_image_transformer.py
vllm_omni/diffusion/models/ovis_image/pipeline_ovis_image.py
vllm_omni/diffusion/models/qwen_image/__init__.py
vllm_omni/diffusion/models/qwen_image/autoencoder_kl_qwenimage.py
vllm_omni/diffusion/models/qwen_image/cfg_parallel.py
vllm_omni/diffusion/models/qwen_image/pipeline_qwen_image.py
vllm_omni/diffusion/models/qwen_image/pipeline_qwen_image_edit.py
vllm_omni/diffusion/models/qwen_image/pipeline_qwen_image_edit_plus.py
vllm_omni/diffusion/models/qwen_image/pipeline_qwen_image_layered.py
vllm_omni/diffusion/models/qwen_image/qwen_image_transformer.py
vllm_omni/diffusion/models/schedulers/__init__.py
vllm_omni/diffusion/models/schedulers/base.py
vllm_omni/diffusion/models/schedulers/scheduling_dmd2_euler.py
vllm_omni/diffusion/models/schedulers/scheduling_flow_unipc_multistep.py
vllm_omni/diffusion/models/sd3/__init__.py
vllm_omni/diffusion/models/sd3/pipeline_sd3.py
vllm_omni/diffusion/models/sd3/sd3_transformer.py
vllm_omni/diffusion/models/stable_audio/__init__.py
vllm_omni/diffusion/models/stable_audio/pipeline_stable_audio.py
vllm_omni/diffusion/models/stable_audio/stable_audio_transformer.py
vllm_omni/diffusion/models/t5_encoder/__init__.py
vllm_omni/diffusion/models/t5_encoder/t5_encoder.py
vllm_omni/diffusion/models/t5_encoder/t5_gemma_encoder.py
vllm_omni/diffusion/models/wan2_2/__init__.py
vllm_omni/diffusion/models/wan2_2/patch_diffusers.py
vllm_omni/diffusion/models/wan2_2/pipeline_wan2_2.py
vllm_omni/diffusion/models/wan2_2/pipeline_wan2_2_i2v.py
vllm_omni/diffusion/models/wan2_2/pipeline_wan2_2_ti2v.py
vllm_omni/diffusion/models/wan2_2/pipeline_wan2_2_vace.py
vllm_omni/diffusion/models/wan2_2/scheduling_wan_euler.py
vllm_omni/diffusion/models/wan2_2/wan2_2_transformer.py
vllm_omni/diffusion/models/wan2_2/wan2_2_vace_transformer.py
vllm_omni/diffusion/models/z_image/__init__.py
vllm_omni/diffusion/models/z_image/pipeline_z_image.py
vllm_omni/diffusion/models/z_image/z_image_transformer.py
vllm_omni/diffusion/offloader/__init__.py
vllm_omni/diffusion/offloader/base.py
vllm_omni/diffusion/offloader/layerwise_backend.py
vllm_omni/diffusion/offloader/module_collector.py
vllm_omni/diffusion/offloader/sequential_backend.py
vllm_omni/diffusion/postprocess/__init__.py
vllm_omni/diffusion/postprocess/rife_interpolator.py
vllm_omni/diffusion/profiler/__init__.py
vllm_omni/diffusion/profiler/diffusion_pipeline_profiler.py
vllm_omni/diffusion/sched/__init__.py
vllm_omni/diffusion/sched/base_scheduler.py
vllm_omni/diffusion/sched/interface.py
vllm_omni/diffusion/sched/request_scheduler.py
vllm_omni/diffusion/sched/step_scheduler.py
vllm_omni/diffusion/utils/__init__.py
vllm_omni/diffusion/utils/hf_utils.py
vllm_omni/diffusion/utils/media_utils.py
vllm_omni/diffusion/utils/network_utils.py
vllm_omni/diffusion/utils/prompt_utils.py
vllm_omni/diffusion/utils/size_utils.py
vllm_omni/diffusion/utils/tf_utils.py
vllm_omni/diffusion/worker/__init__.py
vllm_omni/diffusion/worker/diffusion_model_runner.py
vllm_omni/diffusion/worker/diffusion_worker.py
vllm_omni/diffusion/worker/utils.py
vllm_omni/distributed/__init__.py
vllm_omni/distributed/kv_transfer/__init__.py
vllm_omni/distributed/kv_transfer/monkey_patch.py
vllm_omni/distributed/omni_connectors/__init__.py
vllm_omni/distributed/omni_connectors/adapter.py
vllm_omni/distributed/omni_connectors/factory.py
vllm_omni/distributed/omni_connectors/kv_transfer_manager.py
vllm_omni/distributed/omni_connectors/connectors/__init__.py
vllm_omni/distributed/omni_connectors/connectors/base.py
vllm_omni/distributed/omni_connectors/connectors/mooncake_store_connector.py
vllm_omni/distributed/omni_connectors/connectors/mooncake_transfer_engine_connector.py
vllm_omni/distributed/omni_connectors/connectors/shm_connector.py
vllm_omni/distributed/omni_connectors/connectors/yuanrong_connector.py
vllm_omni/distributed/omni_connectors/transfer_adapter/__init__.py
vllm_omni/distributed/omni_connectors/transfer_adapter/base.py
vllm_omni/distributed/omni_connectors/transfer_adapter/chunk_transfer_adapter.py
vllm_omni/distributed/omni_connectors/utils/__init__.py
vllm_omni/distributed/omni_connectors/utils/config.py
vllm_omni/distributed/omni_connectors/utils/initialization.py
vllm_omni/distributed/omni_connectors/utils/kv_utils.py
vllm_omni/distributed/omni_connectors/utils/logging.py
vllm_omni/distributed/omni_connectors/utils/serialization.py
vllm_omni/distributed/omni_coordinator/__init__.py
vllm_omni/distributed/omni_coordinator/load_balancer.py
vllm_omni/distributed/omni_coordinator/messages.py
vllm_omni/distributed/omni_coordinator/omni_coord_client_for_hub.py
vllm_omni/distributed/omni_coordinator/omni_coord_client_for_stage.py
vllm_omni/distributed/omni_coordinator/omni_coordinator.py
vllm_omni/distributed/ray_utils/__init__.py
vllm_omni/distributed/ray_utils/utils.py
vllm_omni/engine/__init__.py
vllm_omni/engine/arg_utils.py
vllm_omni/engine/async_omni_engine.py
vllm_omni/engine/cfg_companion_tracker.py
vllm_omni/engine/mm_outputs.py
vllm_omni/engine/orchestrator.py
vllm_omni/engine/output_modality.py
vllm_omni/engine/output_processor.py
vllm_omni/engine/serialization.py
vllm_omni/engine/stage_engine_core_client.py
vllm_omni/engine/stage_engine_core_proc.py
vllm_omni/engine/stage_engine_startup.py
vllm_omni/engine/stage_init_utils.py
vllm_omni/entrypoints/__init__.py
vllm_omni/entrypoints/async_omni.py
vllm_omni/entrypoints/chat_utils.py
vllm_omni/entrypoints/client_request_state.py
vllm_omni/entrypoints/omni.py
vllm_omni/entrypoints/omni_base.py
vllm_omni/entrypoints/pd_utils.py
vllm_omni/entrypoints/stage_utils.py
vllm_omni/entrypoints/utils.py
vllm_omni/entrypoints/cli/__init__.py
vllm_omni/entrypoints/cli/logo.py
vllm_omni/entrypoints/cli/main.py
vllm_omni/entrypoints/cli/serve.py
vllm_omni/entrypoints/cli/benchmark/__init__.py
vllm_omni/entrypoints/cli/benchmark/base.py
vllm_omni/entrypoints/cli/benchmark/main.py
vllm_omni/entrypoints/cli/benchmark/serve.py
vllm_omni/entrypoints/openai/__init__.py
vllm_omni/entrypoints/openai/api_server.py
vllm_omni/entrypoints/openai/audio_utils_mixin.py
vllm_omni/entrypoints/openai/errors.py
vllm_omni/entrypoints/openai/image_api_utils.py
vllm_omni/entrypoints/openai/realtime_connection.py
vllm_omni/entrypoints/openai/serving_audio_generate.py
vllm_omni/entrypoints/openai/serving_chat.py
vllm_omni/entrypoints/openai/serving_speech.py
vllm_omni/entrypoints/openai/serving_speech_stream.py
vllm_omni/entrypoints/openai/serving_video.py
vllm_omni/entrypoints/openai/serving_video_stream.py
vllm_omni/entrypoints/openai/stage_params.py
vllm_omni/entrypoints/openai/storage.py
vllm_omni/entrypoints/openai/stores.py
vllm_omni/entrypoints/openai/text_splitter.py
vllm_omni/entrypoints/openai/utils.py
vllm_omni/entrypoints/openai/video_api_utils.py
vllm_omni/entrypoints/openai/video_frame_filter.py
vllm_omni/entrypoints/openai/video_stream_context.py
vllm_omni/entrypoints/openai/video_stream_envs.py
vllm_omni/entrypoints/openai/video_stream_session.py
vllm_omni/entrypoints/openai/protocol/__init__.py
vllm_omni/entrypoints/openai/protocol/audio.py
vllm_omni/entrypoints/openai/protocol/chat_completion.py
vllm_omni/entrypoints/openai/protocol/images.py
vllm_omni/entrypoints/openai/protocol/videos.py
vllm_omni/inputs/__init__.py
vllm_omni/inputs/data.py
vllm_omni/inputs/preprocess.py
vllm_omni/lora/__init__.py
vllm_omni/lora/request.py
vllm_omni/lora/utils.py
vllm_omni/metrics/__init__.py
vllm_omni/metrics/stats.py
vllm_omni/metrics/utils.py
vllm_omni/model_executor/__init__.py
vllm_omni/model_executor/custom_process_mixin.py
vllm_omni/model_executor/layers/__init__.py
vllm_omni/model_executor/layers/rotary_embedding/__init__.py
vllm_omni/model_executor/layers/rotary_embedding/mrope.py
vllm_omni/model_executor/model_loader/__init__.py
vllm_omni/model_executor/model_loader/weight_utils.py
vllm_omni/model_executor/models/__init__.py
vllm_omni/model_executor/models/output_templates.py
vllm_omni/model_executor/models/registry.py
vllm_omni/model_executor/models/utils.py
vllm_omni/model_executor/models/whisper_utils.py
vllm_omni/model_executor/models/bagel/__init__.py
vllm_omni/model_executor/models/bagel/bagel.py
vllm_omni/model_executor/models/bagel/pipeline.py
vllm_omni/model_executor/models/common/__init__.py
vllm_omni/model_executor/models/common/qwen3_code_predictor.py
vllm_omni/model_executor/models/cosyvoice3/__init__.py
vllm_omni/model_executor/models/cosyvoice3/config.py
vllm_omni/model_executor/models/cosyvoice3/cosyvoice3.py
vllm_omni/model_executor/models/cosyvoice3/cosyvoice3_code2wav.py
vllm_omni/model_executor/models/cosyvoice3/cosyvoice3_talker.py
vllm_omni/model_executor/models/cosyvoice3/pipeline.py
vllm_omni/model_executor/models/cosyvoice3/tokenizer.py
vllm_omni/model_executor/models/cosyvoice3/utils.py
vllm_omni/model_executor/models/cosyvoice3/code2wav_core/__init__.py
vllm_omni/model_executor/models/cosyvoice3/code2wav_core/cfm.py
vllm_omni/model_executor/models/cosyvoice3/code2wav_core/hifigan.py
vllm_omni/model_executor/models/cosyvoice3/code2wav_core/layers.py
vllm_omni/model_executor/models/dynin_omni/__init__.py
vllm_omni/model_executor/models/dynin_omni/dynin_omni.py
vllm_omni/model_executor/models/dynin_omni/dynin_omni_common.py
vllm_omni/model_executor/models/dynin_omni/dynin_omni_token2audio.py
vllm_omni/model_executor/models/dynin_omni/dynin_omni_token2image.py
vllm_omni/model_executor/models/dynin_omni/dynin_omni_token2text.py
vllm_omni/model_executor/models/fish_speech/__init__.py
vllm_omni/model_executor/models/fish_speech/configuration_fish_speech.py
vllm_omni/model_executor/models/fish_speech/dac_encoder.py
vllm_omni/model_executor/models/fish_speech/dac_utils.py
vllm_omni/model_executor/models/fish_speech/fish_speech_dac_decoder.py
vllm_omni/model_executor/models/fish_speech/fish_speech_fast_ar.py
vllm_omni/model_executor/models/fish_speech/fish_speech_slow_ar.py
vllm_omni/model_executor/models/fish_speech/pipeline.py
vllm_omni/model_executor/models/fish_speech/prompt_utils.py
vllm_omni/model_executor/models/glm_image/__init__.py
vllm_omni/model_executor/models/glm_image/glm_image_ar.py
vllm_omni/model_executor/models/glm_image/pipeline.py
vllm_omni/model_executor/models/hunyuan_image3/__init__.py
vllm_omni/model_executor/models/hunyuan_image3/autoencoder_kl_3d.py
vllm_omni/model_executor/models/hunyuan_image3/hunyuan_image3.py
vllm_omni/model_executor/models/hunyuan_image3/siglip2.py
vllm_omni/model_executor/models/mammoth_moda2/__init__.py
vllm_omni/model_executor/models/mammoth_moda2/mammoth_moda2.py
vllm_omni/model_executor/models/mammoth_moda2/pipeline_mammothmoda2_dit.py
vllm_omni/model_executor/models/mimo_audio/__init__.py
vllm_omni/model_executor/models/mimo_audio/config_mimo_audio.py
vllm_omni/model_executor/models/mimo_audio/mimo_audio.py
vllm_omni/model_executor/models/mimo_audio/mimo_audio_code2wav.py
vllm_omni/model_executor/models/mimo_audio/mimo_audio_llm.py
vllm_omni/model_executor/models/mimo_audio/modeling_audio_tokenizer.py
vllm_omni/model_executor/models/mimo_audio/modeling_rope_utils.py
vllm_omni/model_executor/models/mimo_audio/pipeline.py
vllm_omni/model_executor/models/mimo_audio/quantization.py
vllm_omni/model_executor/models/ming_flash_omni/__init__.py
vllm_omni/model_executor/models/ming_flash_omni/audio_encoder.py
vllm_omni/model_executor/models/ming_flash_omni/audio_vae.py
vllm_omni/model_executor/models/ming_flash_omni/ming_flash_omni.py
vllm_omni/model_executor/models/ming_flash_omni/ming_flash_omni_talker.py
vllm_omni/model_executor/models/ming_flash_omni/ming_flash_omni_thinker.py
vllm_omni/model_executor/models/ming_flash_omni/modeling_bailing_moe_v2.py
vllm_omni/model_executor/models/ming_flash_omni/projectors.py
vllm_omni/model_executor/models/ming_flash_omni/prompt_utils.py
vllm_omni/model_executor/models/ming_flash_omni/spk_embedding.py
vllm_omni/model_executor/models/ming_flash_omni/talker_module.py
vllm_omni/model_executor/models/ming_flash_omni/text_processing.py
vllm_omni/model_executor/models/ming_flash_omni/vision_encoder.py
vllm_omni/model_executor/models/ming_flash_omni/voice_presets.py
vllm_omni/model_executor/models/moss_tts_nano/__init__.py
vllm_omni/model_executor/models/moss_tts_nano/configuration_moss_tts_nano.py
vllm_omni/model_executor/models/moss_tts_nano/modeling_moss_tts_nano.py
vllm_omni/model_executor/models/moss_tts_nano/pipeline.py
vllm_omni/model_executor/models/omnivoice/__init__.py
vllm_omni/model_executor/models/omnivoice/config.py
vllm_omni/model_executor/models/omnivoice/duration.py
vllm_omni/model_executor/models/omnivoice/omnivoice.py
vllm_omni/model_executor/models/omnivoice/omnivoice_decoder.py
vllm_omni/model_executor/models/omnivoice/omnivoice_generator.py
vllm_omni/model_executor/models/qwen2_5_omni/__init__.py
vllm_omni/model_executor/models/qwen2_5_omni/audio_length.py
vllm_omni/model_executor/models/qwen2_5_omni/pipeline.py
vllm_omni/model_executor/models/qwen2_5_omni/qwen2_5_omni.py
vllm_omni/model_executor/models/qwen2_5_omni/qwen2_5_omni_talker.py
vllm_omni/model_executor/models/qwen2_5_omni/qwen2_5_omni_thinker.py
vllm_omni/model_executor/models/qwen2_5_omni/qwen2_5_omni_token2wav.py
vllm_omni/model_executor/models/qwen2_5_omni/qwen2_old.py
vllm_omni/model_executor/models/qwen3_omni/__init__.py
vllm_omni/model_executor/models/qwen3_omni/pipeline.py
vllm_omni/model_executor/models/qwen3_omni/qwen3_moe.py
vllm_omni/model_executor/models/qwen3_omni/qwen3_omni.py
vllm_omni/model_executor/models/qwen3_omni/qwen3_omni_code2wav.py
vllm_omni/model_executor/models/qwen3_omni/qwen3_omni_moe_code_predictor_mtp.py
vllm_omni/model_executor/models/qwen3_omni/qwen3_omni_moe_talker.py
vllm_omni/model_executor/models/qwen3_omni/qwen3_omni_moe_thinker.py
vllm_omni/model_executor/models/qwen3_tts/__init__.py
vllm_omni/model_executor/models/qwen3_tts/configuration_qwen3_tts.py
vllm_omni/model_executor/models/qwen3_tts/cuda_graph_decoder_wrapper.py
vllm_omni/model_executor/models/qwen3_tts/pipeline.py
vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code2wav.py
vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_code_predictor_vllm.py
vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_talker.py
vllm_omni/model_executor/models/qwen3_tts/qwen3_tts_tokenizer.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_12hz/__init__.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_12hz/configuration_qwen3_tts_tokenizer_v2.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_12hz/modeling_qwen3_tts_tokenizer_v2.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_25hz/__init__.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_25hz/configuration_qwen3_tts_tokenizer_v1.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_25hz/modeling_qwen3_tts_tokenizer_v1.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_25hz/vq/__init__.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_25hz/vq/core_vq.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_25hz/vq/speech_vq.py
vllm_omni/model_executor/models/qwen3_tts/tokenizer_25hz/vq/whisper_encoder.py
vllm_omni/model_executor/models/voxcpm/__init__.py
vllm_omni/model_executor/models/voxcpm/configuration_voxcpm.py
vllm_omni/model_executor/models/voxcpm/voxcpm.py
vllm_omni/model_executor/models/voxcpm/voxcpm_loader.py
vllm_omni/model_executor/models/voxcpm/voxcpm_runtime_utils.py
vllm_omni/model_executor/models/voxcpm/voxcpm_stage_wrappers.py
vllm_omni/model_executor/models/voxcpm2/__init__.py
vllm_omni/model_executor/models/voxcpm2/minicpm4_hf_compat.py
vllm_omni/model_executor/models/voxcpm2/minicpm4_paged.py
vllm_omni/model_executor/models/voxcpm2/pipeline.py
vllm_omni/model_executor/models/voxcpm2/voxcpm2_import_utils.py
vllm_omni/model_executor/models/voxcpm2/voxcpm2_talker.py
vllm_omni/model_executor/models/voxtral_tts/__init__.py
vllm_omni/model_executor/models/voxtral_tts/cuda_graph_acoustic_transformer_wrapper.py
vllm_omni/model_executor/models/voxtral_tts/pipeline.py
vllm_omni/model_executor/models/voxtral_tts/voxtral_tts.py
vllm_omni/model_executor/models/voxtral_tts/voxtral_tts_audio_generation.py
vllm_omni/model_executor/models/voxtral_tts/voxtral_tts_audio_tokenizer.py
vllm_omni/model_executor/stage_configs/__init__.py
vllm_omni/model_executor/stage_configs/bailingmm_moe_v2_lite.yaml
vllm_omni/model_executor/stage_configs/dynin_omni.yaml
vllm_omni/model_executor/stage_configs/dynin_omni_multiconnector.yaml
vllm_omni/model_executor/stage_configs/hunyuan_image3_i2t.yaml
vllm_omni/model_executor/stage_configs/hunyuan_image3_it2i.yaml
vllm_omni/model_executor/stage_configs/hunyuan_image3_moe.yaml
vllm_omni/model_executor/stage_configs/hunyuan_image3_moe_dit_2gpu_fp8.yaml
vllm_omni/model_executor/stage_configs/hunyuan_image3_t2i.yaml
vllm_omni/model_executor/stage_configs/hunyuan_image3_t2i_2gpu.yaml
vllm_omni/model_executor/stage_configs/hunyuan_image3_t2t.yaml
vllm_omni/model_executor/stage_configs/mammoth_moda2.yaml
vllm_omni/model_executor/stage_configs/mammoth_moda2_ar.yaml
vllm_omni/model_executor/stage_configs/ming_flash_omni.yaml
vllm_omni/model_executor/stage_configs/ming_flash_omni_tts.yaml
vllm_omni/model_executor/stage_configs/omnivoice.yaml
vllm_omni/model_executor/stage_configs/qwen3_tts_uniproc.yaml
vllm_omni/model_executor/stage_configs/voxcpm.yaml
vllm_omni/model_executor/stage_configs/voxcpm_async_chunk.yaml
vllm_omni/model_executor/stage_input_processors/__init__.py
vllm_omni/model_executor/stage_input_processors/bagel.py
vllm_omni/model_executor/stage_input_processors/chunk_size_utils.py
vllm_omni/model_executor/stage_input_processors/cosyvoice3.py
vllm_omni/model_executor/stage_input_processors/dynin_omni.py
vllm_omni/model_executor/stage_input_processors/fish_speech.py
vllm_omni/model_executor/stage_input_processors/glm_image.py
vllm_omni/model_executor/stage_input_processors/hunyuan_image3.py
vllm_omni/model_executor/stage_input_processors/mammoth_moda2.py
vllm_omni/model_executor/stage_input_processors/mimo_audio.py
vllm_omni/model_executor/stage_input_processors/ming_flash_omni.py
vllm_omni/model_executor/stage_input_processors/omnivoice.py
vllm_omni/model_executor/stage_input_processors/qwen2_5_omni.py
vllm_omni/model_executor/stage_input_processors/qwen3_omni.py
vllm_omni/model_executor/stage_input_processors/qwen3_tts.py
vllm_omni/model_executor/stage_input_processors/tts_utils.py
vllm_omni/model_executor/stage_input_processors/voxcpm.py
vllm_omni/model_executor/stage_input_processors/voxtral_tts.py
vllm_omni/platforms/__init__.py
vllm_omni/platforms/interface.py
vllm_omni/platforms/cuda/__init__.py
vllm_omni/platforms/cuda/platform.py
vllm_omni/platforms/musa/__init__.py
vllm_omni/platforms/musa/platform.py
vllm_omni/platforms/npu/__init__.py
vllm_omni/platforms/npu/platform.py
vllm_omni/platforms/npu/profiler.py
vllm_omni/platforms/npu/models/__init__.py
vllm_omni/platforms/npu/models/hunyuan_fused_moe.py
vllm_omni/platforms/npu/stage_configs/hunyuan_image3_t2i.yaml
vllm_omni/platforms/npu/stage_configs/voxcpm.yaml
vllm_omni/platforms/npu/stage_configs/voxcpm_async_chunk.yaml
vllm_omni/platforms/npu/worker/__init__.py
vllm_omni/platforms/npu/worker/base.py
vllm_omni/platforms/npu/worker/npu_ar_model_runner.py
vllm_omni/platforms/npu/worker/npu_ar_worker.py
vllm_omni/platforms/npu/worker/npu_generation_model_runner.py
vllm_omni/platforms/npu/worker/npu_generation_worker.py
vllm_omni/platforms/npu/worker/npu_model_runner.py
vllm_omni/platforms/rocm/__init__.py
vllm_omni/platforms/rocm/platform.py
vllm_omni/platforms/xpu/__init__.py
vllm_omni/platforms/xpu/platform.py
vllm_omni/platforms/xpu/utils.py
vllm_omni/platforms/xpu/stage_configs/hunyuan_image3_t2i.yaml
vllm_omni/platforms/xpu/stage_configs/voxtral_tts.yaml
vllm_omni/platforms/xpu/worker/__init__.py
vllm_omni/platforms/xpu/worker/xpu_ar_model_runner.py
vllm_omni/platforms/xpu/worker/xpu_ar_worker.py
vllm_omni/platforms/xpu/worker/xpu_generation_model_runner.py
vllm_omni/platforms/xpu/worker/xpu_generation_worker.py
vllm_omni/plugins/__init__.py
vllm_omni/profiler/__init__.py
vllm_omni/profiler/omni_torch_profiler.py
vllm_omni/quantization/__init__.py
vllm_omni/quantization/component_config.py
vllm_omni/quantization/factory.py
vllm_omni/quantization/gguf_config.py
vllm_omni/quantization/inc_config.py
vllm_omni/quantization/int8_config.py
vllm_omni/sample/__init__.py
vllm_omni/tokenizers/__init__.py
vllm_omni/tokenizers/mammoth_moda2_tokenizer.py
vllm_omni/transformers_utils/__init__.py
vllm_omni/transformers_utils/configs/__init__.py
vllm_omni/transformers_utils/configs/fish_speech.py
vllm_omni/transformers_utils/configs/mammoth_moda2.py
vllm_omni/transformers_utils/configs/ming_flash_omni.py
vllm_omni/transformers_utils/configs/voxcpm.py
vllm_omni/transformers_utils/configs/voxcpm2.py
vllm_omni/transformers_utils/configs/voxtral_tts.py
vllm_omni/transformers_utils/parsers/__init__.py
vllm_omni/transformers_utils/parsers/voxtral_tts.py
vllm_omni/transformers_utils/processors/__init__.py
vllm_omni/transformers_utils/processors/ming.py
vllm_omni/utils/__init__.py
vllm_omni/utils/audio.py
vllm_omni/utils/mm_outputs.py
vllm_omni/utils/voice_cache.py
vllm_omni/worker/__init__.py
vllm_omni/worker/base.py
vllm_omni/worker/gpu_ar_model_runner.py
vllm_omni/worker/gpu_ar_worker.py
vllm_omni/worker/gpu_generation_model_runner.py
vllm_omni/worker/gpu_generation_worker.py
vllm_omni/worker/gpu_memory_utils.py
vllm_omni/worker/gpu_model_runner.py
vllm_omni/worker/mixins.py
vllm_omni/worker/omni_connector_model_runner_mixin.py
vllm_omni/worker/payload_span.py