CMakeLists.txt
LICENSE
MANIFEST.in
README.md
pyproject.toml
setup.py
cmake/cpu_extension.cmake
cmake/utils.cmake
cmake/external_projects/flashmla.cmake
cmake/external_projects/vllm_flash_attn.cmake
csrc/activation_kernels.cu
csrc/cache.h
csrc/cache_kernels.cu
csrc/cuda_compat.h
csrc/cuda_utils.h
csrc/cuda_utils_kernels.cu
csrc/cuda_view.cu
csrc/cumem_allocator.cpp
csrc/custom_all_reduce.cu
csrc/custom_all_reduce.cuh
csrc/custom_all_reduce_test.cu
csrc/custom_quickreduce.cu
csrc/dispatch_utils.h
csrc/layernorm_kernels.cu
csrc/layernorm_quant_kernels.cu
csrc/ops.h
csrc/permute_cols.cu
csrc/pos_encoding_kernels.cu
csrc/sampler.cu
csrc/torch_bindings.cpp
csrc/type_convert.cuh
csrc/attention/attention_dtypes.h
csrc/attention/attention_generic.cuh
csrc/attention/attention_kernels.cuh
csrc/attention/attention_utils.cuh
csrc/attention/dtype_bfloat16.cuh
csrc/attention/dtype_float16.cuh
csrc/attention/dtype_float32.cuh
csrc/attention/dtype_fp8.cuh
csrc/attention/merge_attn_states.cu
csrc/attention/paged_attention_v1.cu
csrc/attention/paged_attention_v2.cu
csrc/attention/vertical_slash_index.cu
csrc/attention/mla/cutlass_mla_entry.cu
csrc/attention/mla/cutlass_mla_kernels.cu
csrc/attention/mla/sm100_cutlass_mla_kernel.cu
csrc/core/registration.h
csrc/cpu/activation.cpp
csrc/cpu/attention.cpp
csrc/cpu/cache.cpp
csrc/cpu/dnnl_helper.cpp
csrc/cpu/dnnl_helper.h
csrc/cpu/dnnl_kernels.cpp
csrc/cpu/layernorm.cpp
csrc/cpu/mla_decode.cpp
csrc/cpu/pos_encoding.cpp
csrc/cpu/shm.cpp
csrc/cpu/torch_bindings.cpp
csrc/cpu/utils.cpp
csrc/cpu/sgl-kernels/common.h
csrc/cpu/sgl-kernels/gemm.cpp
csrc/cpu/sgl-kernels/gemm.h
csrc/cpu/sgl-kernels/gemm_fp8.cpp
csrc/cpu/sgl-kernels/gemm_int8.cpp
csrc/cpu/sgl-kernels/moe.cpp
csrc/cpu/sgl-kernels/moe_fp8.cpp
csrc/cpu/sgl-kernels/moe_int8.cpp
csrc/cpu/sgl-kernels/vec.h
csrc/cutlass_extensions/common.cpp
csrc/cutlass_extensions/cute_utils.cuh
csrc/cutlass_extensions/vllm_collective_builder.cuh
csrc/cutlass_extensions/vllm_custom_types.cuh
csrc/cutlass_extensions/vllm_numeric_conversion.cuh
csrc/cutlass_extensions/vllm_type_utils.cuh
csrc/mamba/mamba_ssm/selective_scan.h
csrc/mamba/mamba_ssm/selective_scan_fwd.cu
csrc/mamba/mamba_ssm/static_switch.h
csrc/moe/grouped_topk_kernels.cu
csrc/moe/moe_align_sum_kernels.cu
csrc/moe/moe_ops.h
csrc/moe/moe_permute_unpermute_op.cu
csrc/moe/moe_wna16.cu
csrc/moe/moe_wna16_utils.h
csrc/moe/topk_softmax_kernels.cu
csrc/moe/torch_bindings.cpp
csrc/moe/marlin_moe_wna16/kernel.h
csrc/moe/marlin_moe_wna16/marlin_template.h
csrc/moe/marlin_moe_wna16/ops.cu
csrc/moe/permute_unpermute_kernels/dispatch.h
csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.cu
csrc/moe/permute_unpermute_kernels/moe_permute_unpermute_kernel.h
csrc/quantization/activation_kernels.cu
csrc/quantization/per_token_group_quant_8bit.h
csrc/quantization/utils.cuh
csrc/quantization/vectorization.cuh
csrc/quantization/vectorization_utils.cuh
csrc/quantization/awq/dequantize.cuh
csrc/quantization/awq/gemm_kernels.cu
csrc/quantization/compressed_tensors/int8_quant_kernels.cu
csrc/quantization/cutlass_w4a8/w4a8_mm_entry.cu
csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cu
csrc/quantization/cutlass_w8a8/scaled_mm_c2x.cuh
csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm75_dispatch.cuh
csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm80_dispatch.cuh
csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_fp8_dispatch.cuh
csrc/quantization/cutlass_w8a8/scaled_mm_c2x_sm89_int8_dispatch.cuh
csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm100.cu
csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm120.cu
csrc/quantization/cutlass_w8a8/scaled_mm_c3x_sm90.cu
csrc/quantization/cutlass_w8a8/scaled_mm_entry.cu
csrc/quantization/cutlass_w8a8/c3x/cutlass_gemm_caller.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_azp_sm90_int8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm100_fp8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm100_fp8_dispatch.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm120_fp8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm120_fp8_dispatch.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm90_fp8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_blockwise_sm90_fp8_dispatch.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm100_fp8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm100_fp8_dispatch.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm120_fp8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm120_fp8_dispatch.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_fp8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_fp8_dispatch.cuh
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_int8.cu
csrc/quantization/cutlass_w8a8/c3x/scaled_mm_sm90_int8_dispatch.cuh
csrc/quantization/cutlass_w8a8/moe/blockwise_scaled_group_mm_sm100.cu
csrc/quantization/cutlass_w8a8/moe/get_group_starts.cuh
csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x.cuh
csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x_sm100.cu
csrc/quantization/cutlass_w8a8/moe/grouped_mm_c3x_sm90.cu
csrc/quantization/cutlass_w8a8/moe/moe_data.cu
csrc/quantization/fp4/activation_nvfp4_quant_fusion_kernels.cu
csrc/quantization/fp4/nvfp4_blockwise_moe_kernel.cu
csrc/quantization/fp4/nvfp4_experts_quant.cu
csrc/quantization/fp4/nvfp4_quant_entry.cu
csrc/quantization/fp4/nvfp4_quant_kernels.cu
csrc/quantization/fp4/nvfp4_scaled_mm_entry.cu
csrc/quantization/fp4/nvfp4_scaled_mm_kernels.cu
csrc/quantization/fp4/nvfp4_scaled_mm_sm120_kernels.cu
csrc/quantization/fp8/common.cu
csrc/quantization/fp8/common.cuh
csrc/quantization/fp8/per_token_group_quant.cu
csrc/quantization/fp8/amd/quant_utils.cuh
csrc/quantization/fp8/nvidia/quant_utils.cuh
csrc/quantization/fused_kernels/fused_layernorm_dynamic_per_token_quant.cu
csrc/quantization/fused_kernels/layernorm_utils.cuh
csrc/quantization/fused_kernels/quant_conversions.cuh
csrc/quantization/gguf/dequantize.cuh
csrc/quantization/gguf/ggml-common.h
csrc/quantization/gguf/gguf_kernel.cu
csrc/quantization/gguf/mmq.cuh
csrc/quantization/gguf/mmvq.cuh
csrc/quantization/gguf/moe.cuh
csrc/quantization/gguf/moe_vec.cuh
csrc/quantization/gguf/vecdotq.cuh
csrc/quantization/gptq/compat.cuh
csrc/quantization/gptq/matrix_view.cuh
csrc/quantization/gptq/q_gemm.cu
csrc/quantization/gptq/qdq_2.cuh
csrc/quantization/gptq/qdq_3.cuh
csrc/quantization/gptq/qdq_4.cuh
csrc/quantization/gptq/qdq_8.cuh
csrc/quantization/gptq/qdq_util.cuh
csrc/quantization/gptq_allspark/allspark_qgemm_w8a16.cu
csrc/quantization/gptq_allspark/allspark_repack.cu
csrc/quantization/gptq_allspark/allspark_utils.cuh
csrc/quantization/gptq_marlin/awq_marlin_repack.cu
csrc/quantization/gptq_marlin/dequant.h
csrc/quantization/gptq_marlin/gptq_marlin.cu
csrc/quantization/gptq_marlin/gptq_marlin_repack.cu
csrc/quantization/gptq_marlin/kernel.h
csrc/quantization/gptq_marlin/marlin.cuh
csrc/quantization/gptq_marlin/marlin_dtypes.cuh
csrc/quantization/gptq_marlin/marlin_template.h
csrc/quantization/machete/machete_collective_builder.cuh
csrc/quantization/machete/machete_interleaving_utils.cuh
csrc/quantization/machete/machete_mainloop.cuh
csrc/quantization/machete/machete_mm_kernel.cuh
csrc/quantization/machete/machete_mm_launcher.cuh
csrc/quantization/machete/machete_prepack_kernel.cuh
csrc/quantization/machete/machete_prepack_launcher.cuh
csrc/quantization/machete/machete_prepacked_layout.cuh
csrc/quantization/machete/machete_pytorch.cu
csrc/quantization/marlin/sparse/marlin_24_cuda_kernel.cu
csrc/quantization/marlin/sparse/common/base.h
csrc/quantization/marlin/sparse/common/mem.h
csrc/quantization/marlin/sparse/common/mma.h
csrc/quickreduce/base.h
csrc/quickreduce/quick_reduce.h
csrc/quickreduce/quick_reduce_impl.cuh
csrc/rocm/attention.cu
csrc/rocm/ops.h
csrc/rocm/skinny_gemms.cu
csrc/rocm/torch_bindings.cpp
csrc/sparse/cutlass/sparse_compressor_c3x.cuh
csrc/sparse/cutlass/sparse_scaled_mm_c3x.cu
csrc/sparse/cutlass/sparse_scaled_mm_c3x.cuh
csrc/sparse/cutlass/sparse_scaled_mm_entry.cu
examples/template_alpaca.jinja
examples/template_baichuan.jinja
examples/template_chatglm.jinja
examples/template_chatglm2.jinja
examples/template_chatml.jinja
examples/template_dse_qwen2_vl.jinja
examples/template_falcon.jinja
examples/template_falcon_180b.jinja
examples/template_inkbot.jinja
examples/template_teleflm.jinja
examples/template_vlm2vec.jinja
examples/tool_chat_template_deepseekr1.jinja
examples/tool_chat_template_deepseekv3.jinja
examples/tool_chat_template_deepseekv31.jinja
examples/tool_chat_template_gemma3_pythonic.jinja
examples/tool_chat_template_granite.jinja
examples/tool_chat_template_granite_20b_fc.jinja
examples/tool_chat_template_hermes.jinja
examples/tool_chat_template_hunyuan_a13b.jinja
examples/tool_chat_template_internlm2_tool.jinja
examples/tool_chat_template_llama3.1_json.jinja
examples/tool_chat_template_llama3.2_json.jinja
examples/tool_chat_template_llama3.2_pythonic.jinja
examples/tool_chat_template_llama4_json.jinja
examples/tool_chat_template_llama4_pythonic.jinja
examples/tool_chat_template_minimax_m1.jinja
examples/tool_chat_template_mistral.jinja
examples/tool_chat_template_mistral3.jinja
examples/tool_chat_template_mistral_parallel.jinja
examples/tool_chat_template_phi4_mini.jinja
examples/tool_chat_template_qwen3coder.jinja
examples/tool_chat_template_toolace.jinja
examples/tool_chat_template_xlam_llama.jinja
examples/tool_chat_template_xlam_qwen.jinja
requirements/build.txt
requirements/common.txt
requirements/cpu-build.txt
requirements/cpu.txt
requirements/cuda.txt
requirements/dev.txt
requirements/docs.txt
requirements/kv_connectors.txt
requirements/lint.txt
requirements/neuron.txt
requirements/nightly_torch_test.txt
requirements/rocm-build.txt
requirements/rocm-test.txt
requirements/rocm.txt
requirements/test.txt
requirements/tpu.txt
requirements/xpu.txt
vllm/__init__.py
vllm/_custom_ops.py
vllm/_ipex_ops.py
vllm/beam_search.py
vllm/collect_env.py
vllm/connections.py
vllm/env_override.py
vllm/envs.py
vllm/forward_context.py
vllm/logger.py
vllm/logits_process.py
vllm/logprobs.py
vllm/outputs.py
vllm/pooling_params.py
vllm/py.typed
vllm/sampling_params.py
vllm/scalar_type.py
vllm/scripts.py
vllm/sequence.py
vllm/tasks.py
vllm/test_utils.py
vllm/tracing.py
vllm/version.py
vllm/adapter_commons/__init__.py
vllm/adapter_commons/layers.py
vllm/adapter_commons/models.py
vllm/adapter_commons/request.py
vllm/adapter_commons/utils.py
vllm/adapter_commons/worker_manager.py
vllm/assets/__init__.py
vllm/assets/audio.py
vllm/assets/base.py
vllm/assets/image.py
vllm/assets/video.py
vllm/attention/__init__.py
vllm/attention/layer.py
vllm/attention/selector.py
vllm/attention/backends/__init__.py
vllm/attention/backends/abstract.py
vllm/attention/backends/differential_flash_attn.py
vllm/attention/backends/dual_chunk_flash_attn.py
vllm/attention/backends/flash_attn.py
vllm/attention/backends/flashmla.py
vllm/attention/backends/placeholder_attn.py
vllm/attention/backends/rocm_aiter_mla.py
vllm/attention/backends/rocm_flash_attn.py
vllm/attention/backends/triton_mla.py
vllm/attention/backends/utils.py
vllm/attention/backends/xformers.py
vllm/attention/backends/mla/__init__.py
vllm/attention/backends/mla/common.py
vllm/attention/layers/__init__.py
vllm/attention/layers/chunked_local_attention.py
vllm/attention/layers/encoder_only_attention.py
vllm/attention/ops/__init__.py
vllm/attention/ops/chunked_prefill_paged_decode.py
vllm/attention/ops/flashmla.py
vllm/attention/ops/merge_attn_states.py
vllm/attention/ops/nki_flash_attn.py
vllm/attention/ops/paged_attn.py
vllm/attention/ops/pallas_kv_cache_update.py
vllm/attention/ops/prefix_prefill.py
vllm/attention/ops/rocm_aiter_mla.py
vllm/attention/ops/rocm_aiter_paged_attn.py
vllm/attention/ops/triton_decode_attention.py
vllm/attention/ops/triton_flash_attention.py
vllm/attention/ops/triton_merge_attn_states.py
vllm/attention/ops/triton_unified_attention.py
vllm/attention/utils/__init__.py
vllm/attention/utils/fa_utils.py
vllm/attention/utils/kv_sharing_utils.py
vllm/benchmarks/__init__.py
vllm/benchmarks/datasets.py
vllm/benchmarks/latency.py
vllm/benchmarks/serve.py
vllm/benchmarks/throughput.py
vllm/compilation/__init__.py
vllm/compilation/activation_quant_fusion.py
vllm/compilation/backends.py
vllm/compilation/base_static_graph.py
vllm/compilation/collective_fusion.py
vllm/compilation/compiler_interface.py
vllm/compilation/counter.py
vllm/compilation/cuda_graph.py
vllm/compilation/cuda_piecewise_backend.py
vllm/compilation/decorators.py
vllm/compilation/fix_functionalization.py
vllm/compilation/fusion.py
vllm/compilation/fusion_attn.py
vllm/compilation/fx_utils.py
vllm/compilation/inductor_pass.py
vllm/compilation/monitor.py
vllm/compilation/multi_output_match.py
vllm/compilation/noop_elimination.py
vllm/compilation/pass_manager.py
vllm/compilation/sequence_parallelism.py
vllm/compilation/torch25_custom_graph_pass.py
vllm/compilation/vllm_inductor_pass.py
vllm/compilation/wrapper.py
vllm/config/__init__.py
vllm/config/cache.py
vllm/config/compilation.py
vllm/config/parallel.py
vllm/config/scheduler.py
vllm/config/utils.py
vllm/core/__init__.py
vllm/core/block_manager.py
vllm/core/evictor.py
vllm/core/interfaces.py
vllm/core/placeholder_block_space_manager.py
vllm/core/scheduler.py
vllm/core/block/__init__.py
vllm/core/block/block_table.py
vllm/core/block/common.py
vllm/core/block/cpu_gpu_block_allocator.py
vllm/core/block/interfaces.py
vllm/core/block/naive_block.py
vllm/core/block/prefix_caching_block.py
vllm/core/block/utils.py
vllm/device_allocator/__init__.py
vllm/device_allocator/cumem.py
vllm/distributed/__init__.py
vllm/distributed/communication_op.py
vllm/distributed/kv_events.py
vllm/distributed/parallel_state.py
vllm/distributed/tpu_distributed_utils.py
vllm/distributed/utils.py
vllm/distributed/device_communicators/__init__.py
vllm/distributed/device_communicators/all2all.py
vllm/distributed/device_communicators/all_reduce_utils.py
vllm/distributed/device_communicators/base_device_communicator.py
vllm/distributed/device_communicators/cpu_communicator.py
vllm/distributed/device_communicators/cuda_communicator.py
vllm/distributed/device_communicators/cuda_wrapper.py
vllm/distributed/device_communicators/custom_all_reduce.py
vllm/distributed/device_communicators/neuron_communicator.py
vllm/distributed/device_communicators/pynccl.py
vllm/distributed/device_communicators/pynccl_wrapper.py
vllm/distributed/device_communicators/quick_all_reduce.py
vllm/distributed/device_communicators/ray_communicator.py
vllm/distributed/device_communicators/shm_broadcast.py
vllm/distributed/device_communicators/symm_mem.py
vllm/distributed/device_communicators/tpu_communicator.py
vllm/distributed/device_communicators/xpu_communicator.py
vllm/distributed/eplb/__init__.py
vllm/distributed/eplb/eplb_state.py
vllm/distributed/eplb/rebalance_algo.py
vllm/distributed/eplb/rebalance_execute.py
vllm/distributed/kv_transfer/__init__.py
vllm/distributed/kv_transfer/kv_transfer_state.py
vllm/distributed/kv_transfer/kv_connector/__init__.py
vllm/distributed/kv_transfer/kv_connector/base.py
vllm/distributed/kv_transfer/kv_connector/factory.py
vllm/distributed/kv_transfer/kv_connector/utils.py
vllm/distributed/kv_transfer/kv_connector/v1/__init__.py
vllm/distributed/kv_transfer/kv_connector/v1/base.py
vllm/distributed/kv_transfer/kv_connector/v1/lmcache_connector.py
vllm/distributed/kv_transfer/kv_connector/v1/multi_connector.py
vllm/distributed/kv_transfer/kv_connector/v1/nixl_connector.py
vllm/distributed/kv_transfer/kv_connector/v1/shared_storage_connector.py
vllm/distributed/kv_transfer/kv_connector/v1/p2p/__init__.py
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_connector.py
vllm/distributed/kv_transfer/kv_connector/v1/p2p/p2p_nccl_engine.py
vllm/distributed/kv_transfer/kv_connector/v1/p2p/tensor_memory_pool.py
vllm/distributed/kv_transfer/kv_lookup_buffer/__init__.py
vllm/distributed/kv_transfer/kv_lookup_buffer/base.py
vllm/distributed/kv_transfer/kv_lookup_buffer/mooncake_store.py
vllm/distributed/kv_transfer/kv_lookup_buffer/simple_buffer.py
vllm/distributed/kv_transfer/kv_pipe/__init__.py
vllm/distributed/kv_transfer/kv_pipe/base.py
vllm/distributed/kv_transfer/kv_pipe/mooncake_pipe.py
vllm/distributed/kv_transfer/kv_pipe/pynccl_pipe.py
vllm/engine/__init__.py
vllm/engine/arg_utils.py
vllm/engine/async_llm_engine.py
vllm/engine/async_timeout.py
vllm/engine/llm_engine.py
vllm/engine/metrics.py
vllm/engine/metrics_types.py
vllm/engine/protocol.py
vllm/engine/multiprocessing/__init__.py
vllm/engine/multiprocessing/client.py
vllm/engine/multiprocessing/engine.py
vllm/engine/output_processor/__init__.py
vllm/engine/output_processor/interfaces.py
vllm/engine/output_processor/single_step.py
vllm/engine/output_processor/stop_checker.py
vllm/engine/output_processor/util.py
vllm/entrypoints/__init__.py
vllm/entrypoints/api_server.py
vllm/entrypoints/chat_utils.py
vllm/entrypoints/constants.py
vllm/entrypoints/context.py
vllm/entrypoints/harmony_utils.py
vllm/entrypoints/launcher.py
vllm/entrypoints/llm.py
vllm/entrypoints/logger.py
vllm/entrypoints/score_utils.py
vllm/entrypoints/ssl.py
vllm/entrypoints/tool.py
vllm/entrypoints/tool_server.py
vllm/entrypoints/utils.py
vllm/entrypoints/cli/__init__.py
vllm/entrypoints/cli/collect_env.py
vllm/entrypoints/cli/main.py
vllm/entrypoints/cli/openai.py
vllm/entrypoints/cli/run_batch.py
vllm/entrypoints/cli/serve.py
vllm/entrypoints/cli/types.py
vllm/entrypoints/cli/benchmark/__init__.py
vllm/entrypoints/cli/benchmark/base.py
vllm/entrypoints/cli/benchmark/latency.py
vllm/entrypoints/cli/benchmark/main.py
vllm/entrypoints/cli/benchmark/serve.py
vllm/entrypoints/cli/benchmark/throughput.py
vllm/entrypoints/openai/__init__.py
vllm/entrypoints/openai/api_server.py
vllm/entrypoints/openai/cli_args.py
vllm/entrypoints/openai/logits_processors.py
vllm/entrypoints/openai/protocol.py
vllm/entrypoints/openai/run_batch.py
vllm/entrypoints/openai/serving_chat.py
vllm/entrypoints/openai/serving_classification.py
vllm/entrypoints/openai/serving_completion.py
vllm/entrypoints/openai/serving_embedding.py
vllm/entrypoints/openai/serving_engine.py
vllm/entrypoints/openai/serving_models.py
vllm/entrypoints/openai/serving_pooling.py
vllm/entrypoints/openai/serving_responses.py
vllm/entrypoints/openai/serving_score.py
vllm/entrypoints/openai/serving_tokenization.py
vllm/entrypoints/openai/serving_transcription.py
vllm/entrypoints/openai/speech_to_text.py
vllm/entrypoints/openai/tool_parsers/__init__.py
vllm/entrypoints/openai/tool_parsers/abstract_tool_parser.py
vllm/entrypoints/openai/tool_parsers/deepseekv31_tool_parser.py
vllm/entrypoints/openai/tool_parsers/deepseekv3_tool_parser.py
vllm/entrypoints/openai/tool_parsers/glm4_moe_tool_parser.py
vllm/entrypoints/openai/tool_parsers/granite_20b_fc_tool_parser.py
vllm/entrypoints/openai/tool_parsers/granite_tool_parser.py
vllm/entrypoints/openai/tool_parsers/hermes_tool_parser.py
vllm/entrypoints/openai/tool_parsers/hunyuan_a13b_tool_parser.py
vllm/entrypoints/openai/tool_parsers/internlm2_tool_parser.py
vllm/entrypoints/openai/tool_parsers/jamba_tool_parser.py
vllm/entrypoints/openai/tool_parsers/kimi_k2_tool_parser.py
vllm/entrypoints/openai/tool_parsers/llama4_pythonic_tool_parser.py
vllm/entrypoints/openai/tool_parsers/llama_tool_parser.py
vllm/entrypoints/openai/tool_parsers/minimax_tool_parser.py
vllm/entrypoints/openai/tool_parsers/mistral_tool_parser.py
vllm/entrypoints/openai/tool_parsers/phi4mini_tool_parser.py
vllm/entrypoints/openai/tool_parsers/pythonic_tool_parser.py
vllm/entrypoints/openai/tool_parsers/qwen3coder_tool_parser.py
vllm/entrypoints/openai/tool_parsers/seed_oss_tool_parser.py
vllm/entrypoints/openai/tool_parsers/step3_tool_parser.py
vllm/entrypoints/openai/tool_parsers/utils.py
vllm/entrypoints/openai/tool_parsers/xlam_tool_parser.py
vllm/executor/__init__.py
vllm/executor/executor_base.py
vllm/executor/mp_distributed_executor.py
vllm/executor/msgspec_utils.py
vllm/executor/multiproc_worker_utils.py
vllm/executor/ray_distributed_executor.py
vllm/executor/ray_utils.py
vllm/executor/uniproc_executor.py
vllm/inputs/__init__.py
vllm/inputs/data.py
vllm/inputs/parse.py
vllm/inputs/preprocess.py
vllm/inputs/registry.py
vllm/logging_utils/__init__.py
vllm/logging_utils/dump_input.py
vllm/logging_utils/formatter.py
vllm/lora/__init__.py
vllm/lora/fully_sharded_layers.py
vllm/lora/layers.py
vllm/lora/lora.py
vllm/lora/models.py
vllm/lora/peft_helper.py
vllm/lora/request.py
vllm/lora/resolver.py
vllm/lora/utils.py
vllm/lora/worker_manager.py
vllm/lora/ops/__init__.py
vllm/lora/ops/ipex_ops/__init__.py
vllm/lora/ops/ipex_ops/lora_ops.py
vllm/lora/ops/torch_ops/__init__.py
vllm/lora/ops/torch_ops/lora_ops.py
vllm/lora/ops/triton_ops/__init__.py
vllm/lora/ops/triton_ops/kernel_utils.py
vllm/lora/ops/triton_ops/lora_expand_op.py
vllm/lora/ops/triton_ops/lora_kernel_metadata.py
vllm/lora/ops/triton_ops/lora_shrink_op.py
vllm/lora/ops/triton_ops/utils.py
vllm/lora/ops/xla_ops/__init__.py
vllm/lora/ops/xla_ops/lora_ops.py
vllm/lora/punica_wrapper/__init__.py
vllm/lora/punica_wrapper/punica_base.py
vllm/lora/punica_wrapper/punica_cpu.py
vllm/lora/punica_wrapper/punica_gpu.py
vllm/lora/punica_wrapper/punica_selector.py
vllm/lora/punica_wrapper/punica_tpu.py
vllm/lora/punica_wrapper/punica_xpu.py
vllm/lora/punica_wrapper/utils.py
vllm/model_executor/__init__.py
vllm/model_executor/custom_op.py
vllm/model_executor/parameter.py
vllm/model_executor/sampling_metadata.py
vllm/model_executor/utils.py
vllm/model_executor/layers/__init__.py
vllm/model_executor/layers/activation.py
vllm/model_executor/layers/attention_layer_base.py
vllm/model_executor/layers/layernorm.py
vllm/model_executor/layers/lightning_attn.py
vllm/model_executor/layers/linear.py
vllm/model_executor/layers/logits_processor.py
vllm/model_executor/layers/pooler.py
vllm/model_executor/layers/resampler.py
vllm/model_executor/layers/sampler.py
vllm/model_executor/layers/utils.py
vllm/model_executor/layers/vocab_parallel_embedding.py
vllm/model_executor/layers/fused_moe/__init__.py
vllm/model_executor/layers/fused_moe/batched_deep_gemm_moe.py
vllm/model_executor/layers/fused_moe/batched_triton_or_deep_gemm_moe.py
vllm/model_executor/layers/fused_moe/config.py
vllm/model_executor/layers/fused_moe/cpu_fused_moe.py
vllm/model_executor/layers/fused_moe/cutlass_moe.py
vllm/model_executor/layers/fused_moe/deep_gemm_moe.py
vllm/model_executor/layers/fused_moe/deep_gemm_utils.py
vllm/model_executor/layers/fused_moe/deepep_ht_prepare_finalize.py
vllm/model_executor/layers/fused_moe/deepep_ll_prepare_finalize.py
vllm/model_executor/layers/fused_moe/flashinfer_cutlass_moe.py
vllm/model_executor/layers/fused_moe/flashinfer_cutlass_prepare_finalize.py
vllm/model_executor/layers/fused_moe/fused_batched_moe.py
vllm/model_executor/layers/fused_moe/fused_marlin_moe.py
vllm/model_executor/layers/fused_moe/fused_moe.py
vllm/model_executor/layers/fused_moe/gpt_oss_triton_kernels_moe.py
vllm/model_executor/layers/fused_moe/layer.py
vllm/model_executor/layers/fused_moe/modular_kernel.py
vllm/model_executor/layers/fused_moe/moe_align_block_size.py
vllm/model_executor/layers/fused_moe/moe_pallas.py
vllm/model_executor/layers/fused_moe/moe_permute_unpermute.py
vllm/model_executor/layers/fused_moe/moe_torch_iterative.py
vllm/model_executor/layers/fused_moe/pplx_prepare_finalize.py
vllm/model_executor/layers/fused_moe/prepare_finalize.py
vllm/model_executor/layers/fused_moe/rocm_aiter_fused_moe.py
vllm/model_executor/layers/fused_moe/routing_simulator.py
vllm/model_executor/layers/fused_moe/topk_weight_and_reduce.py
vllm/model_executor/layers/fused_moe/triton_deep_gemm_moe.py
vllm/model_executor/layers/fused_moe/trtllm_moe.py
vllm/model_executor/layers/fused_moe/utils.py
vllm/model_executor/layers/mamba/__init__.py
vllm/model_executor/layers/mamba/abstract.py
vllm/model_executor/layers/mamba/linear_attn.py
vllm/model_executor/layers/mamba/mamba2_metadata.py
vllm/model_executor/layers/mamba/mamba_mixer.py
vllm/model_executor/layers/mamba/mamba_mixer2.py
vllm/model_executor/layers/mamba/mamba_utils.py
vllm/model_executor/layers/mamba/short_conv.py
vllm/model_executor/layers/mamba/ops/__init__.py
vllm/model_executor/layers/mamba/ops/causal_conv1d.py
vllm/model_executor/layers/mamba/ops/layernorm_gated.py
vllm/model_executor/layers/mamba/ops/mamba_ssm.py
vllm/model_executor/layers/mamba/ops/ssd_bmm.py
vllm/model_executor/layers/mamba/ops/ssd_chunk_scan.py
vllm/model_executor/layers/mamba/ops/ssd_chunk_state.py
vllm/model_executor/layers/mamba/ops/ssd_combined.py
vllm/model_executor/layers/mamba/ops/ssd_state_passing.py
vllm/model_executor/layers/quantization/__init__.py
vllm/model_executor/layers/quantization/auto_round.py
vllm/model_executor/layers/quantization/awq.py
vllm/model_executor/layers/quantization/awq_marlin.py
vllm/model_executor/layers/quantization/awq_triton.py
vllm/model_executor/layers/quantization/base_config.py
vllm/model_executor/layers/quantization/bitblas.py
vllm/model_executor/layers/quantization/bitsandbytes.py
vllm/model_executor/layers/quantization/deepgemm.py
vllm/model_executor/layers/quantization/deepspeedfp.py
vllm/model_executor/layers/quantization/experts_int8.py
vllm/model_executor/layers/quantization/fbgemm_fp8.py
vllm/model_executor/layers/quantization/fp8.py
vllm/model_executor/layers/quantization/gguf.py
vllm/model_executor/layers/quantization/gptq.py
vllm/model_executor/layers/quantization/gptq_bitblas.py
vllm/model_executor/layers/quantization/gptq_marlin.py
vllm/model_executor/layers/quantization/gptq_marlin_24.py
vllm/model_executor/layers/quantization/hqq_marlin.py
vllm/model_executor/layers/quantization/inc.py
vllm/model_executor/layers/quantization/input_quant_fp8.py
vllm/model_executor/layers/quantization/ipex_quant.py
vllm/model_executor/layers/quantization/kv_cache.py
vllm/model_executor/layers/quantization/modelopt.py
vllm/model_executor/layers/quantization/moe_wna16.py
vllm/model_executor/layers/quantization/mxfp4.py
vllm/model_executor/layers/quantization/neuron_quant.py
vllm/model_executor/layers/quantization/petit.py
vllm/model_executor/layers/quantization/ptpc_fp8.py
vllm/model_executor/layers/quantization/rtn.py
vllm/model_executor/layers/quantization/schema.py
vllm/model_executor/layers/quantization/torchao.py
vllm/model_executor/layers/quantization/tpu_int8.py
vllm/model_executor/layers/quantization/compressed_tensors/__init__.py
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors.py
vllm/model_executor/layers/quantization/compressed_tensors/compressed_tensors_moe.py
vllm/model_executor/layers/quantization/compressed_tensors/triton_scaled_mm.py
vllm/model_executor/layers/quantization/compressed_tensors/utils.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/__init__.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_24.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_scheme.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_24.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a16_nvfp4.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a4_nvfp4.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a8_fp8.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w4a8_int.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a16_fp8.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_fp8.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_w8a8_int8.py
vllm/model_executor/layers/quantization/compressed_tensors/schemes/compressed_tensors_wNa16.py
vllm/model_executor/layers/quantization/compressed_tensors/transform/linear.py
vllm/model_executor/layers/quantization/compressed_tensors/transform/module.py
vllm/model_executor/layers/quantization/compressed_tensors/transform/utils.py
vllm/model_executor/layers/quantization/compressed_tensors/transform/schemes/linear_qutlass_nvfp4.py
vllm/model_executor/layers/quantization/kernels/__init__.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/MPLinearKernel.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/__init__.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/allspark.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/bitblas.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/conch.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/cutlass.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/dynamic_4bit.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/exllama.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/machete.py
vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
vllm/model_executor/layers/quantization/kernels/scaled_mm/ScaledMMLinearKernel.py
vllm/model_executor/layers/quantization/kernels/scaled_mm/__init__.py
vllm/model_executor/layers/quantization/kernels/scaled_mm/aiter.py
vllm/model_executor/layers/quantization/kernels/scaled_mm/cpu.py
vllm/model_executor/layers/quantization/kernels/scaled_mm/cutlass.py
vllm/model_executor/layers/quantization/kernels/scaled_mm/triton.py
vllm/model_executor/layers/quantization/kernels/scaled_mm/xla.py
vllm/model_executor/layers/quantization/quark/__init__.py
vllm/model_executor/layers/quantization/quark/quark.py
vllm/model_executor/layers/quantization/quark/quark_moe.py
vllm/model_executor/layers/quantization/quark/utils.py
vllm/model_executor/layers/quantization/quark/schemes/__init__.py
vllm/model_executor/layers/quantization/quark/schemes/quark_scheme.py
vllm/model_executor/layers/quantization/quark/schemes/quark_w4a4_mxfp4.py
vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_fp8.py
vllm/model_executor/layers/quantization/quark/schemes/quark_w8a8_int8.py
vllm/model_executor/layers/quantization/utils/__init__.py
vllm/model_executor/layers/quantization/utils/allspark_utils.py
vllm/model_executor/layers/quantization/utils/bitblas_utils.py
vllm/model_executor/layers/quantization/utils/flashinfer_fp4_moe.py
vllm/model_executor/layers/quantization/utils/flashinfer_utils.py
vllm/model_executor/layers/quantization/utils/fp8_utils.py
vllm/model_executor/layers/quantization/utils/gptq_utils.py
vllm/model_executor/layers/quantization/utils/int8_utils.py
vllm/model_executor/layers/quantization/utils/layer_utils.py
vllm/model_executor/layers/quantization/utils/machete_utils.py
vllm/model_executor/layers/quantization/utils/marlin_utils.py
vllm/model_executor/layers/quantization/utils/marlin_utils_fp4.py
vllm/model_executor/layers/quantization/utils/marlin_utils_fp8.py
vllm/model_executor/layers/quantization/utils/marlin_utils_test.py
vllm/model_executor/layers/quantization/utils/marlin_utils_test_24.py
vllm/model_executor/layers/quantization/utils/mxfp4_utils.py
vllm/model_executor/layers/quantization/utils/mxfp8_utils.py
vllm/model_executor/layers/quantization/utils/nvfp4_emulation_utils.py
vllm/model_executor/layers/quantization/utils/nvfp4_moe_support.py
vllm/model_executor/layers/quantization/utils/petit_utils.py
vllm/model_executor/layers/quantization/utils/quant_utils.py
vllm/model_executor/layers/quantization/utils/w8a8_utils.py
vllm/model_executor/layers/rotary_embedding/__init__.py
vllm/model_executor/layers/rotary_embedding/base.py
vllm/model_executor/layers/rotary_embedding/common.py
vllm/model_executor/layers/rotary_embedding/deepseek_scaling_rope.py
vllm/model_executor/layers/rotary_embedding/dual_chunk_rope.py
vllm/model_executor/layers/rotary_embedding/dynamic_ntk_alpha_rope.py
vllm/model_executor/layers/rotary_embedding/dynamic_ntk_scaling_rope.py
vllm/model_executor/layers/rotary_embedding/ernie45_vl_rope.py
vllm/model_executor/layers/rotary_embedding/linear_scaling_rope.py
vllm/model_executor/layers/rotary_embedding/llama3_rope.py
vllm/model_executor/layers/rotary_embedding/llama4_vision_rope.py
vllm/model_executor/layers/rotary_embedding/mrope.py
vllm/model_executor/layers/rotary_embedding/ntk_scaling_rope.py
vllm/model_executor/layers/rotary_embedding/phi3_long_rope_scaled_rope.py
vllm/model_executor/layers/rotary_embedding/yarn_scaling_rope.py
vllm/model_executor/model_loader/__init__.py
vllm/model_executor/model_loader/base_loader.py
vllm/model_executor/model_loader/bitsandbytes_loader.py
vllm/model_executor/model_loader/default_loader.py
vllm/model_executor/model_loader/dummy_loader.py
vllm/model_executor/model_loader/gguf_loader.py
vllm/model_executor/model_loader/neuron.py
vllm/model_executor/model_loader/neuronx_distributed.py
vllm/model_executor/model_loader/runai_streamer_loader.py
vllm/model_executor/model_loader/sharded_state_loader.py
vllm/model_executor/model_loader/tensorizer.py
vllm/model_executor/model_loader/tensorizer_loader.py
vllm/model_executor/model_loader/tpu.py
vllm/model_executor/model_loader/utils.py
vllm/model_executor/model_loader/weight_utils.py
vllm/model_executor/models/__init__.py
vllm/model_executor/models/adapters.py
vllm/model_executor/models/aimv2.py
vllm/model_executor/models/apertus.py
vllm/model_executor/models/arcee.py
vllm/model_executor/models/arctic.py
vllm/model_executor/models/aria.py
vllm/model_executor/models/aya_vision.py
vllm/model_executor/models/baichuan.py
vllm/model_executor/models/bailing_moe.py
vllm/model_executor/models/bamba.py
vllm/model_executor/models/bart.py
vllm/model_executor/models/bert.py
vllm/model_executor/models/bert_with_rope.py
vllm/model_executor/models/blip.py
vllm/model_executor/models/blip2.py
vllm/model_executor/models/bloom.py
vllm/model_executor/models/chameleon.py
vllm/model_executor/models/chatglm.py
vllm/model_executor/models/clip.py
vllm/model_executor/models/cohere2_vision.py
vllm/model_executor/models/commandr.py
vllm/model_executor/models/config.py
vllm/model_executor/models/constant_size_cache.py
vllm/model_executor/models/dbrx.py
vllm/model_executor/models/deepseek.py
vllm/model_executor/models/deepseek_eagle.py
vllm/model_executor/models/deepseek_mtp.py
vllm/model_executor/models/deepseek_v2.py
vllm/model_executor/models/deepseek_vl2.py
vllm/model_executor/models/donut.py
vllm/model_executor/models/dots1.py
vllm/model_executor/models/ernie45.py
vllm/model_executor/models/ernie45_moe.py
vllm/model_executor/models/ernie45_vl.py
vllm/model_executor/models/ernie45_vl_moe.py
vllm/model_executor/models/ernie_mtp.py
vllm/model_executor/models/exaone.py
vllm/model_executor/models/exaone4.py
vllm/model_executor/models/fairseq2_llama.py
vllm/model_executor/models/falcon.py
vllm/model_executor/models/falcon_h1.py
vllm/model_executor/models/florence2.py
vllm/model_executor/models/fuyu.py
vllm/model_executor/models/gemma.py
vllm/model_executor/models/gemma2.py
vllm/model_executor/models/gemma3.py
vllm/model_executor/models/gemma3_mm.py
vllm/model_executor/models/gemma3n.py
vllm/model_executor/models/gemma3n_mm.py
vllm/model_executor/models/glm.py
vllm/model_executor/models/glm4.py
vllm/model_executor/models/glm4_1v.py
vllm/model_executor/models/glm4_moe.py
vllm/model_executor/models/glm4_moe_mtp.py
vllm/model_executor/models/glm4v.py
vllm/model_executor/models/gpt2.py
vllm/model_executor/models/gpt_bigcode.py
vllm/model_executor/models/gpt_j.py
vllm/model_executor/models/gpt_neox.py
vllm/model_executor/models/gpt_oss.py
vllm/model_executor/models/granite.py
vllm/model_executor/models/granite_speech.py
vllm/model_executor/models/granitemoe.py
vllm/model_executor/models/granitemoehybrid.py
vllm/model_executor/models/granitemoeshared.py
vllm/model_executor/models/gritlm.py
vllm/model_executor/models/grok1.py
vllm/model_executor/models/h2ovl.py
vllm/model_executor/models/hunyuan_v1.py
vllm/model_executor/models/hyperclovax_vision.py
vllm/model_executor/models/idefics2_vision_model.py
vllm/model_executor/models/idefics3.py
vllm/model_executor/models/interfaces.py
vllm/model_executor/models/interfaces_base.py
vllm/model_executor/models/intern_vit.py
vllm/model_executor/models/internlm2.py
vllm/model_executor/models/internlm2_ve.py
vllm/model_executor/models/interns1.py
vllm/model_executor/models/interns1_vit.py
vllm/model_executor/models/internvl.py
vllm/model_executor/models/jais.py
vllm/model_executor/models/jamba.py
vllm/model_executor/models/jina_vl.py
vllm/model_executor/models/keye.py
vllm/model_executor/models/keye_vl1_5.py
vllm/model_executor/models/kimi_vl.py
vllm/model_executor/models/lfm2.py
vllm/model_executor/models/llama.py
vllm/model_executor/models/llama4.py
vllm/model_executor/models/llama4_eagle.py
vllm/model_executor/models/llama_eagle.py
vllm/model_executor/models/llama_eagle3.py
vllm/model_executor/models/llava.py
vllm/model_executor/models/llava_next.py
vllm/model_executor/models/llava_next_video.py
vllm/model_executor/models/llava_onevision.py
vllm/model_executor/models/mamba.py
vllm/model_executor/models/mamba2.py
vllm/model_executor/models/mamba_cache.py
vllm/model_executor/models/medusa.py
vllm/model_executor/models/mimo.py
vllm/model_executor/models/mimo_mtp.py
vllm/model_executor/models/minicpm.py
vllm/model_executor/models/minicpm3.py
vllm/model_executor/models/minicpm_eagle.py
vllm/model_executor/models/minicpmo.py
vllm/model_executor/models/minicpmv.py
vllm/model_executor/models/minimax_cache.py
vllm/model_executor/models/minimax_text_01.py
vllm/model_executor/models/minimax_vl_01.py
vllm/model_executor/models/mistral3.py
vllm/model_executor/models/mixtral.py
vllm/model_executor/models/mixtral_quant.py
vllm/model_executor/models/mllama.py
vllm/model_executor/models/mllama4.py
vllm/model_executor/models/mlp_speculator.py
vllm/model_executor/models/modernbert.py
vllm/model_executor/models/module_mapping.py
vllm/model_executor/models/molmo.py
vllm/model_executor/models/moonvit.py
vllm/model_executor/models/mpt.py
vllm/model_executor/models/nemotron.py
vllm/model_executor/models/nemotron_h.py
vllm/model_executor/models/nemotron_nas.py
vllm/model_executor/models/nemotron_vl.py
vllm/model_executor/models/nvlm_d.py
vllm/model_executor/models/olmo.py
vllm/model_executor/models/olmo2.py
vllm/model_executor/models/olmoe.py
vllm/model_executor/models/omega.py
vllm/model_executor/models/omega17_exp.py
vllm/model_executor/models/opt.py
vllm/model_executor/models/orion.py
vllm/model_executor/models/ovis.py
vllm/model_executor/models/ovis2_5.py
vllm/model_executor/models/paligemma.py
vllm/model_executor/models/persimmon.py
vllm/model_executor/models/phi.py
vllm/model_executor/models/phi3.py
vllm/model_executor/models/phi3v.py
vllm/model_executor/models/phi4_multimodal.py
vllm/model_executor/models/phi4flash.py
vllm/model_executor/models/phi4mm.py
vllm/model_executor/models/phi4mm_audio.py
vllm/model_executor/models/phi4mm_utils.py
vllm/model_executor/models/phimoe.py
vllm/model_executor/models/pixtral.py
vllm/model_executor/models/plamo2.py
vllm/model_executor/models/prithvi_geospatial_mae.py
vllm/model_executor/models/qwen.py
vllm/model_executor/models/qwen2.py
vllm/model_executor/models/qwen2_5_omni_thinker.py
vllm/model_executor/models/qwen2_5_vl.py
vllm/model_executor/models/qwen2_audio.py
vllm/model_executor/models/qwen2_moe.py
vllm/model_executor/models/qwen2_rm.py
vllm/model_executor/models/qwen2_vl.py
vllm/model_executor/models/qwen3.py
vllm/model_executor/models/qwen3_moe.py
vllm/model_executor/models/qwen_vl.py
vllm/model_executor/models/registry.py
vllm/model_executor/models/roberta.py
vllm/model_executor/models/rvl.py
vllm/model_executor/models/seed_oss.py
vllm/model_executor/models/siglip.py
vllm/model_executor/models/siglip2navit.py
vllm/model_executor/models/skyworkr1v.py
vllm/model_executor/models/smolvlm.py
vllm/model_executor/models/solar.py
vllm/model_executor/models/stablelm.py
vllm/model_executor/models/starcoder2.py
vllm/model_executor/models/step3_text.py
vllm/model_executor/models/step3_vl.py
vllm/model_executor/models/swin.py
vllm/model_executor/models/tarsier.py
vllm/model_executor/models/telechat2.py
vllm/model_executor/models/teleflm.py
vllm/model_executor/models/transformers.py
vllm/model_executor/models/ultravox.py
vllm/model_executor/models/utils.py
vllm/model_executor/models/vision.py
vllm/model_executor/models/voxtral.py
vllm/model_executor/models/whisper.py
vllm/model_executor/models/zamba2.py
vllm/model_executor/utils/modeling_rope_utils.py
vllm/model_executor/warmup/__init__.py
vllm/model_executor/warmup/deep_gemm_warmup.py
vllm/model_executor/warmup/kernel_warmup.py
vllm/multimodal/__init__.py
vllm/multimodal/audio.py
vllm/multimodal/base.py
vllm/multimodal/cache.py
vllm/multimodal/hasher.py
vllm/multimodal/image.py
vllm/multimodal/inputs.py
vllm/multimodal/parse.py
vllm/multimodal/processing.py
vllm/multimodal/profiling.py
vllm/multimodal/registry.py
vllm/multimodal/utils.py
vllm/multimodal/video.py
vllm/platforms/__init__.py
vllm/platforms/cpu.py
vllm/platforms/cuda.py
vllm/platforms/interface.py
vllm/platforms/neuron.py
vllm/platforms/rocm.py
vllm/platforms/tpu.py
vllm/platforms/xpu.py
vllm/plugins/__init__.py
vllm/plugins/io_processors/__init__.py
vllm/plugins/io_processors/interface.py
vllm/plugins/lora_resolvers/__init__.py
vllm/plugins/lora_resolvers/filesystem_resolver.py
vllm/profiler/__init__.py
vllm/profiler/layerwise_profile.py
vllm/profiler/utils.py
vllm/ray/__init__.py
vllm/ray/lazy_utils.py
vllm/ray/ray_env.py
vllm/reasoning/__init__.py
vllm/reasoning/abs_reasoning_parsers.py
vllm/reasoning/deepseek_r1_reasoning_parser.py
vllm/reasoning/glm4_moe_reasoning_parser.py
vllm/reasoning/gptoss_reasoning_parser.py
vllm/reasoning/granite_reasoning_parser.py
vllm/reasoning/hunyuan_a13b_reasoning_parser.py
vllm/reasoning/mistral_reasoning_parser.py
vllm/reasoning/qwen3_reasoning_parser.py
vllm/reasoning/step3_reasoning_parser.py
vllm/third_party/__init__.py
vllm/third_party/pynvml.py
vllm/transformers_utils/__init__.py
vllm/transformers_utils/config.py
vllm/transformers_utils/detokenizer.py
vllm/transformers_utils/detokenizer_utils.py
vllm/transformers_utils/dynamic_module.py
vllm/transformers_utils/processor.py
vllm/transformers_utils/s3_utils.py
vllm/transformers_utils/tokenizer.py
vllm/transformers_utils/tokenizer_base.py
vllm/transformers_utils/tokenizer_group.py
vllm/transformers_utils/utils.py
vllm/transformers_utils/chat_templates/__init__.py
vllm/transformers_utils/chat_templates/registry.py
vllm/transformers_utils/configs/__init__.py
vllm/transformers_utils/configs/arctic.py
vllm/transformers_utils/configs/chatglm.py
vllm/transformers_utils/configs/deepseek_vl2.py
vllm/transformers_utils/configs/eagle.py
vllm/transformers_utils/configs/falcon.py
vllm/transformers_utils/configs/jais.py
vllm/transformers_utils/configs/kimi_vl.py
vllm/transformers_utils/configs/medusa.py
vllm/transformers_utils/configs/mistral.py
vllm/transformers_utils/configs/mlp_speculator.py
vllm/transformers_utils/configs/moonvit.py
vllm/transformers_utils/configs/nemotron.py
vllm/transformers_utils/configs/nemotron_h.py
vllm/transformers_utils/configs/nemotron_vl.py
vllm/transformers_utils/configs/ovis.py
vllm/transformers_utils/configs/step3_vl.py
vllm/transformers_utils/configs/ultravox.py
vllm/transformers_utils/configs/speculators/__init__.py
vllm/transformers_utils/configs/speculators/algos.py
vllm/transformers_utils/configs/speculators/base.py
vllm/transformers_utils/processors/__init__.py
vllm/transformers_utils/processors/deepseek_vl2.py
vllm/transformers_utils/processors/ovis.py
vllm/transformers_utils/processors/ovis2_5.py
vllm/transformers_utils/tokenizers/__init__.py
vllm/transformers_utils/tokenizers/mistral.py
vllm/triton_utils/__init__.py
vllm/triton_utils/importing.py
vllm/usage/__init__.py
vllm/usage/usage_lib.py
vllm/utils/__init__.py
vllm/utils/deep_gemm.py
vllm/utils/flashinfer.py
vllm/utils/jsontree.py
vllm/utils/tensor_schema.py
vllm/v1/__init__.py
vllm/v1/cudagraph_dispatcher.py
vllm/v1/kv_cache_interface.py
vllm/v1/outputs.py
vllm/v1/request.py
vllm/v1/serial_utils.py
vllm/v1/utils.py
vllm/v1/attention/__init__.py
vllm/v1/attention/backends/__init__.py
vllm/v1/attention/backends/cpu_attn.py
vllm/v1/attention/backends/flash_attn.py
vllm/v1/attention/backends/flashinfer.py
vllm/v1/attention/backends/flex_attention.py
vllm/v1/attention/backends/linear_attn.py
vllm/v1/attention/backends/mamba1_attn.py
vllm/v1/attention/backends/mamba2_attn.py
vllm/v1/attention/backends/mamba_attn.py
vllm/v1/attention/backends/pallas.py
vllm/v1/attention/backends/rocm_aiter_fa.py
vllm/v1/attention/backends/short_conv_attn.py
vllm/v1/attention/backends/tree_attn.py
vllm/v1/attention/backends/triton_attn.py
vllm/v1/attention/backends/utils.py
vllm/v1/attention/backends/xformers.py
vllm/v1/attention/backends/mla/__init__.py
vllm/v1/attention/backends/mla/common.py
vllm/v1/attention/backends/mla/cutlass_mla.py
vllm/v1/attention/backends/mla/flashmla.py
vllm/v1/attention/backends/mla/rocm_aiter_mla.py
vllm/v1/attention/backends/mla/triton_mla.py
vllm/v1/core/__init__.py
vllm/v1/core/block_pool.py
vllm/v1/core/encoder_cache_manager.py
vllm/v1/core/kv_cache_coordinator.py
vllm/v1/core/kv_cache_manager.py
vllm/v1/core/kv_cache_utils.py
vllm/v1/core/single_type_kv_cache_manager.py
vllm/v1/core/sched/__init__.py
vllm/v1/core/sched/async_scheduler.py
vllm/v1/core/sched/interface.py
vllm/v1/core/sched/output.py
vllm/v1/core/sched/request_queue.py
vllm/v1/core/sched/scheduler.py
vllm/v1/core/sched/utils.py
vllm/v1/engine/__init__.py
vllm/v1/engine/async_llm.py
vllm/v1/engine/coordinator.py
vllm/v1/engine/core.py
vllm/v1/engine/core_client.py
vllm/v1/engine/detokenizer.py
vllm/v1/engine/exceptions.py
vllm/v1/engine/llm_engine.py
vllm/v1/engine/logprobs.py
vllm/v1/engine/output_processor.py
vllm/v1/engine/parallel_sampling.py
vllm/v1/engine/processor.py
vllm/v1/engine/utils.py
vllm/v1/executor/__init__.py
vllm/v1/executor/abstract.py
vllm/v1/executor/multiproc_executor.py
vllm/v1/executor/ray_distributed_executor.py
vllm/v1/metrics/__init__.py
vllm/v1/metrics/loggers.py
vllm/v1/metrics/prometheus.py
vllm/v1/metrics/ray_wrappers.py
vllm/v1/metrics/reader.py
vllm/v1/metrics/stats.py
vllm/v1/pool/__init__.py
vllm/v1/pool/metadata.py
vllm/v1/sample/__init__.py
vllm/v1/sample/metadata.py
vllm/v1/sample/rejection_sampler.py
vllm/v1/sample/sampler.py
vllm/v1/sample/logits_processor/__init__.py
vllm/v1/sample/logits_processor/builtin.py
vllm/v1/sample/logits_processor/interface.py
vllm/v1/sample/logits_processor/state.py
vllm/v1/sample/ops/__init__.py
vllm/v1/sample/ops/bad_words.py
vllm/v1/sample/ops/logprobs.py
vllm/v1/sample/ops/penalties.py
vllm/v1/sample/ops/topk_topp_sampler.py
vllm/v1/sample/tpu/__init__.py
vllm/v1/sample/tpu/metadata.py
vllm/v1/sample/tpu/sampler.py
vllm/v1/spec_decode/__init__.py
vllm/v1/spec_decode/eagle.py
vllm/v1/spec_decode/medusa.py
vllm/v1/spec_decode/metadata.py
vllm/v1/spec_decode/metrics.py
vllm/v1/spec_decode/ngram_proposer.py
vllm/v1/spec_decode/utils.py
vllm/v1/structured_output/__init__.py
vllm/v1/structured_output/backend_guidance.py
vllm/v1/structured_output/backend_lm_format_enforcer.py
vllm/v1/structured_output/backend_outlines.py
vllm/v1/structured_output/backend_types.py
vllm/v1/structured_output/backend_xgrammar.py
vllm/v1/structured_output/request.py
vllm/v1/structured_output/utils.py
vllm/v1/worker/__init__.py
vllm/v1/worker/block_table.py
vllm/v1/worker/cpu_model_runner.py
vllm/v1/worker/cpu_worker.py
vllm/v1/worker/gpu_input_batch.py
vllm/v1/worker/gpu_model_runner.py
vllm/v1/worker/gpu_worker.py
vllm/v1/worker/kv_connector_model_runner_mixin.py
vllm/v1/worker/lora_model_runner_mixin.py
vllm/v1/worker/tpu_input_batch.py
vllm/v1/worker/tpu_model_runner.py
vllm/v1/worker/tpu_worker.py
vllm/v1/worker/utils.py
vllm/v1/worker/worker_base.py
vllm/v1/worker/xpu_model_runner.py
vllm/v1/worker/xpu_worker.py
vllm/worker/__init__.py
vllm/worker/cache_engine.py
vllm/worker/enc_dec_model_runner.py
vllm/worker/model_runner.py
vllm/worker/model_runner_base.py
vllm/worker/neuron_model_runner.py
vllm/worker/neuron_worker.py
vllm/worker/neuronx_distributed_model_runner.py
vllm/worker/utils.py
vllm/worker/worker.py
vllm/worker/worker_base.py
vllm_usf.egg-info/PKG-INFO
vllm_usf.egg-info/SOURCES.txt
vllm_usf.egg-info/dependency_links.txt
vllm_usf.egg-info/requires.txt
vllm_usf.egg-info/top_level.txt