LICENSE
README.md
pyproject.toml
tests/test_anthropic_adapter.py
tests/test_anthropic_models.py
tests/test_api_models.py
tests/test_api_utils.py
tests/test_async_markers.py
tests/test_audio.py
tests/test_audio_limits.py
tests/test_batched_engine.py
tests/test_batched_engine_mllm_config.py
tests/test_batching.py
tests/test_batching_deterministic.py
tests/test_bench_serve.py
tests/test_chat_template_kwargs.py
tests/test_constrained_decoding.py
tests/test_continuous_batching.py
tests/test_download.py
tests/test_embeddings.py
tests/test_endpoint_model_policies.py
tests/test_engine_core_stream_safety.py
tests/test_engine_core_thread_streams.py
tests/test_gemma4_openai_format.py
tests/test_gemma4_streaming_edge.py
tests/test_gemma4_tool_parser.py
tests/test_gradio_app.py
tests/test_gradio_text_app.py
tests/test_harmony_parsers.py
tests/test_json_schema_processor_hardening.py
tests/test_kv_cache_quantization.py
tests/test_lifecycle_cli.py
tests/test_lifecycle_manager.py
tests/test_lifecycle_server.py
tests/test_llm.py
tests/test_max_kv_size.py
tests/test_mcp_security.py
tests/test_memory_cache.py
tests/test_memory_cache_mlx.py
tests/test_memory_stability.py
tests/test_metrics.py
tests/test_minimax_tool_calling.py
tests/test_mllm.py
tests/test_mllm_cache.py
tests/test_mllm_continuous_batching.py
tests/test_mllm_mtp_routing.py
tests/test_model_registry.py
tests/test_model_workflow.py
tests/test_native_tool_format.py
tests/test_normalize_messages.py
tests/test_optimizations.py
tests/test_paged_cache.py
tests/test_paged_cache_benefits.py
tests/test_paged_cache_real_inference.py
tests/test_paged_cache_real_model.py
tests/test_platform.py
tests/test_prefix_cache.py
tests/test_prompt_warmup.py
tests/test_qwen35_mllm_patch.py
tests/test_qwen35_mtp_patch.py
tests/test_qwen3_xml_parser.py
tests/test_qwen3_xml_registration.py
tests/test_qwen3_xml_streaming_chunks.py
tests/test_reasoning_parser.py
tests/test_request.py
tests/test_rerank.py
tests/test_responses_api.py
tests/test_server.py
tests/test_server_cache_controls.py
tests/test_simple_engine.py
tests/test_simple_engine_cancel_serialization.py
tests/test_specprefill_rotating_cache.py
tests/test_ssd_cache.py
tests/test_streaming_detokenizer.py
tests/test_streaming_fence_stripper.py
tests/test_streaming_json_encoder.py
tests/test_streaming_latency.py
tests/test_streaming_pipeline_integration.py
tests/test_streaming_think_router.py
tests/test_streaming_tool_filter.py
tests/test_structured_output.py
tests/test_text_model_from_vlm.py
tests/test_thinking_aware_processor.py
tests/test_thinking_processor.py
tests/test_tool_call_promotion.py
tests/test_tool_choice_forced.py
tests/test_tool_choice_none.py
tests/test_tool_parsers.py
tests/test_video.py
vllm_mlx/__init__.py
vllm_mlx/attention.py
vllm_mlx/audio_limits.py
vllm_mlx/bench_serve.py
vllm_mlx/benchmark.py
vllm_mlx/cli.py
vllm_mlx/cli_arg_types.py
vllm_mlx/embedding.py
vllm_mlx/endpoint_model_policies.py
vllm_mlx/engine_core.py
vllm_mlx/gradio_app.py
vllm_mlx/gradio_text_app.py
vllm_mlx/lifecycle.py
vllm_mlx/memory_cache.py
vllm_mlx/metrics.py
vllm_mlx/mllm_batch_generator.py
vllm_mlx/mllm_cache.py
vllm_mlx/mllm_scheduler.py
vllm_mlx/mlx_streams.py
vllm_mlx/model_registry.py
vllm_mlx/model_runner.py
vllm_mlx/model_workflow.py
vllm_mlx/multimodal_processor.py
vllm_mlx/optimizations.py
vllm_mlx/output_collector.py
vllm_mlx/paged_cache.py
vllm_mlx/plugin.py
vllm_mlx/prefix_cache.py
vllm_mlx/prompt_warmup.py
vllm_mlx/request.py
vllm_mlx/rerank.py
vllm_mlx/rerank_forward.py
vllm_mlx/scheduler.py
vllm_mlx/server.py
vllm_mlx/specprefill.py
vllm_mlx/ssd_cache.py
vllm_mlx/text_model_from_vlm.py
vllm_mlx/vision_embedding_cache.py
vllm_mlx/vllm_platform.py
vllm_mlx/worker.py
vllm_mlx.egg-info/PKG-INFO
vllm_mlx.egg-info/SOURCES.txt
vllm_mlx.egg-info/dependency_links.txt
vllm_mlx.egg-info/entry_points.txt
vllm_mlx.egg-info/requires.txt
vllm_mlx.egg-info/top_level.txt
vllm_mlx/api/__init__.py
vllm_mlx/api/anthropic_adapter.py
vllm_mlx/api/anthropic_models.py
vllm_mlx/api/harmony_tools.py
vllm_mlx/api/models.py
vllm_mlx/api/responses_models.py
vllm_mlx/api/streaming.py
vllm_mlx/api/tool_calling.py
vllm_mlx/api/utils.py
vllm_mlx/audio/__init__.py
vllm_mlx/audio/processor.py
vllm_mlx/audio/stt.py
vllm_mlx/audio/tts.py
vllm_mlx/bench_serve_prompts/long.json
vllm_mlx/bench_serve_prompts/medium.json
vllm_mlx/bench_serve_prompts/short.json
vllm_mlx/bench_serve_prompts/thinking.json
vllm_mlx/bench_serve_prompts/warm_prompts_example.json
vllm_mlx/constrained/__init__.py
vllm_mlx/constrained/cache.py
vllm_mlx/constrained/json_schema_processor.py
vllm_mlx/constrained/thinking_processor.py
vllm_mlx/engine/__init__.py
vllm_mlx/engine/base.py
vllm_mlx/engine/batched.py
vllm_mlx/engine/simple.py
vllm_mlx/mcp/__init__.py
vllm_mlx/mcp/client.py
vllm_mlx/mcp/config.py
vllm_mlx/mcp/executor.py
vllm_mlx/mcp/manager.py
vllm_mlx/mcp/security.py
vllm_mlx/mcp/tools.py
vllm_mlx/mcp/types.py
vllm_mlx/models/__init__.py
vllm_mlx/models/llm.py
vllm_mlx/models/mllm.py
vllm_mlx/patches/__init__.py
vllm_mlx/patches/gemma4_mllm.py
vllm_mlx/patches/glm4v_moe_mllm.py
vllm_mlx/patches/qwen3_5_mllm.py
vllm_mlx/patches/qwen3_5_mtp.py
vllm_mlx/patches/qwen3_next_mtp.py
vllm_mlx/reasoning/__init__.py
vllm_mlx/reasoning/base.py
vllm_mlx/reasoning/deepseek_r1_parser.py
vllm_mlx/reasoning/gemma4_parser.py
vllm_mlx/reasoning/glm4_parser.py
vllm_mlx/reasoning/gpt_oss_parser.py
vllm_mlx/reasoning/harmony_parser.py
vllm_mlx/reasoning/qwen3_parser.py
vllm_mlx/reasoning/think_parser.py
vllm_mlx/tool_parsers/__init__.py
vllm_mlx/tool_parsers/abstract_tool_parser.py
vllm_mlx/tool_parsers/auto_tool_parser.py
vllm_mlx/tool_parsers/deepseek_tool_parser.py
vllm_mlx/tool_parsers/functionary_tool_parser.py
vllm_mlx/tool_parsers/gemma4_tool_parser.py
vllm_mlx/tool_parsers/glm47_tool_parser.py
vllm_mlx/tool_parsers/granite_tool_parser.py
vllm_mlx/tool_parsers/harmony_tool_parser.py
vllm_mlx/tool_parsers/hermes_tool_parser.py
vllm_mlx/tool_parsers/kimi_tool_parser.py
vllm_mlx/tool_parsers/llama_tool_parser.py
vllm_mlx/tool_parsers/minimax_tool_parser.py
vllm_mlx/tool_parsers/mistral_tool_parser.py
vllm_mlx/tool_parsers/nemotron_tool_parser.py
vllm_mlx/tool_parsers/qwen3_xml_tool_parser.py
vllm_mlx/tool_parsers/qwen_tool_parser.py
vllm_mlx/tool_parsers/xlam_tool_parser.py
vllm_mlx/utils/__init__.py
vllm_mlx/utils/chat_templates.py
vllm_mlx/utils/download.py
vllm_mlx/utils/mamba_cache.py
vllm_mlx/utils/tokenizer.py