LICENSE
README.md
pyproject.toml
sgl_jax/__init__.py
sgl_jax/__main__.py
sgl_jax/_version.py
sgl_jax/bench_offline_throughput.py
sgl_jax/bench_one_batch.py
sgl_jax/bench_one_batch_server.py
sgl_jax/bench_serving.py
sgl_jax/check_env.py
sgl_jax/global_config.py
sgl_jax/launch_server.py
sgl_jax/profiler.py
sgl_jax/utils.py
sgl_jax/version.py
sgl_jax/srt/__init__.py
sgl_jax/srt/conversation.py
sgl_jax/srt/hf_transformers_utils.py
sgl_jax/srt/jinja_template_utils.py
sgl_jax/srt/memory_profiler.py
sgl_jax/srt/precision_tracer.py
sgl_jax/srt/reasoning_parser.py
sgl_jax/srt/server_args.py
sgl_jax/srt/configs/__init__.py
sgl_jax/srt/configs/bailing_hybrid.py
sgl_jax/srt/configs/kimi_linear.py
sgl_jax/srt/configs/load_config.py
sgl_jax/srt/configs/model_config.py
sgl_jax/srt/configs/quantization_config.py
sgl_jax/srt/constrained/__init__.py
sgl_jax/srt/constrained/base_grammar_backend.py
sgl_jax/srt/constrained/bitmask_ops.py
sgl_jax/srt/constrained/llguidance_backend.py
sgl_jax/srt/entrypoints/EngineBase.py
sgl_jax/srt/entrypoints/engine.py
sgl_jax/srt/entrypoints/http_server.py
sgl_jax/srt/entrypoints/openai/__init__.py
sgl_jax/srt/entrypoints/openai/protocol.py
sgl_jax/srt/entrypoints/openai/serving_base.py
sgl_jax/srt/entrypoints/openai/serving_chat.py
sgl_jax/srt/entrypoints/openai/serving_completions.py
sgl_jax/srt/entrypoints/openai/serving_embedding.py
sgl_jax/srt/entrypoints/openai/serving_rerank.py
sgl_jax/srt/entrypoints/openai/serving_score.py
sgl_jax/srt/entrypoints/openai/usage_processor.py
sgl_jax/srt/entrypoints/openai/utils.py
sgl_jax/srt/eplb/__init__.py
sgl_jax/srt/eplb/expert_location.py
sgl_jax/srt/eplb/eplb_algorithms/__init__.py
sgl_jax/srt/eplb/eplb_algorithms/deepseek.py
sgl_jax/srt/function_call/base_format_detector.py
sgl_jax/srt/function_call/core_types.py
sgl_jax/srt/function_call/ebnf_composer.py
sgl_jax/srt/function_call/function_call_parser.py
sgl_jax/srt/function_call/glm47_moe_detector.py
sgl_jax/srt/function_call/glm4_moe_detector.py
sgl_jax/srt/function_call/mimo_detector.py
sgl_jax/srt/function_call/qwen25_detector.py
sgl_jax/srt/function_call/qwen3_coder_detector.py
sgl_jax/srt/function_call/utils.py
sgl_jax/srt/kernels/fused_moe/__init__.py
sgl_jax/srt/kernels/fused_moe/v1/__init__.py
sgl_jax/srt/kernels/fused_moe/v1/kernel.py
sgl_jax/srt/kernels/fused_moe/v1/tuned_block_configs.py
sgl_jax/srt/kernels/gdn/__init__.py
sgl_jax/srt/kernels/gdn/gated_delta.py
sgl_jax/srt/kernels/gmm/megablox_gmm_backend.py
sgl_jax/srt/kernels/gmm/megablox_gmm_kernel/common.py
sgl_jax/srt/kernels/gmm/megablox_gmm_kernel/gmm.py
sgl_jax/srt/kernels/gmm/megablox_gmm_kernel/gmm_v2.py
sgl_jax/srt/kernels/gmm/megablox_gmm_kernel/tuned_block_sizes.py
sgl_jax/srt/kernels/kda/__init__.py
sgl_jax/srt/kernels/kda/kda.py
sgl_jax/srt/kernels/kda/naive.py
sgl_jax/srt/kernels/mla/__init__.py
sgl_jax/srt/kernels/mla/v1/__init__.py
sgl_jax/srt/kernels/mla/v1/ref.py
sgl_jax/srt/kernels/mla/v2/__init__.py
sgl_jax/srt/kernels/mla/v2/kernel.py
sgl_jax/srt/kernels/paged_attention/paged_attention.py
sgl_jax/srt/kernels/quantized_matmul/blockwise_utils.py
sgl_jax/srt/kernels/quantized_matmul/kernel.py
sgl_jax/srt/kernels/quantized_matmul/quantized_matmul_kernels/__init__.py
sgl_jax/srt/kernels/quantized_matmul/quantized_matmul_kernels/blockwise_kernel.py
sgl_jax/srt/kernels/quantized_matmul/quantized_matmul_kernels/kernel.py
sgl_jax/srt/kernels/quantized_matmul/quantized_matmul_kernels/tuned_block_sizes.py
sgl_jax/srt/kernels/quantized_matmul/quantized_matmul_kernels/util.py
sgl_jax/srt/kernels/ragged_paged_attention/ragged_paged_attention.py
sgl_jax/srt/kernels/ragged_paged_attention/ragged_paged_attention_v3.py
sgl_jax/srt/kernels/ragged_paged_attention/tuned_block_sizes.py
sgl_jax/srt/kernels/ragged_paged_attention/util.py
sgl_jax/srt/kernels/simple_gla/__init__.py
sgl_jax/srt/kernels/simple_gla/native.py
sgl_jax/srt/kernels/simple_gla/simple_gla.py
sgl_jax/srt/kernels/speculative/build_eagle_tree_structure_kernel.py
sgl_jax/srt/kernels/speculative/kernel.py
sgl_jax/srt/kernels/speculative/tree_speculative_sampling_target_only_kernel.py
sgl_jax/srt/kernels/speculative/verify_tree_greedy_kernel.py
sgl_jax/srt/kernels/update_kv_cache/tuned_block_sizes.py
sgl_jax/srt/kernels/update_kv_cache/update_kv_cache.py
sgl_jax/srt/kernels/utils/perf.py
sgl_jax/srt/layers/activation.py
sgl_jax/srt/layers/binary_search.py
sgl_jax/srt/layers/embeddings.py
sgl_jax/srt/layers/fused_moe.py
sgl_jax/srt/layers/gate.py
sgl_jax/srt/layers/layernorm.py
sgl_jax/srt/layers/linear.py
sgl_jax/srt/layers/logits_processor.py
sgl_jax/srt/layers/moe.py
sgl_jax/srt/layers/radix_attention.py
sgl_jax/srt/layers/radix_lightning_attention.py
sgl_jax/srt/layers/radix_linear_attention.py
sgl_jax/srt/layers/routed_experts_capturer.py
sgl_jax/srt/layers/sampler.py
sgl_jax/srt/layers/attention/base_attn_backend.py
sgl_jax/srt/layers/attention/flashattention_backend.py
sgl_jax/srt/layers/attention/hybrid_linear_attn_backend.py
sgl_jax/srt/layers/attention/mla_backend.py
sgl_jax/srt/layers/attention/native_backend.py
sgl_jax/srt/layers/attention/utils.py
sgl_jax/srt/layers/attention/fla/gated_rmsnorm.py
sgl_jax/srt/layers/attention/fla/group_rmsnorm.py
sgl_jax/srt/layers/attention/linear/__init__.py
sgl_jax/srt/layers/attention/linear/gdn_backend.py
sgl_jax/srt/layers/attention/linear/kda_backend.py
sgl_jax/srt/layers/attention/linear/lightning_backend.py
sgl_jax/srt/layers/attention/linear/qwen3_5_gated_delta_net.py
sgl_jax/srt/layers/attention/linear/short_convolution.py
sgl_jax/srt/lora/context_manager.py
sgl_jax/srt/lora/layers.py
sgl_jax/srt/lora/lora.py
sgl_jax/srt/lora/lora_config.py
sgl_jax/srt/lora/lora_manager.py
sgl_jax/srt/lora/lora_memory_pool.py
sgl_jax/srt/lora/lora_registry.py
sgl_jax/srt/lora/utils.py
sgl_jax/srt/lora/backend/base_backend.py
sgl_jax/srt/lora/backend/bgmv_backend.py
sgl_jax/srt/managers/__init__.py
sgl_jax/srt/managers/communication.py
sgl_jax/srt/managers/detokenizer_manager.py
sgl_jax/srt/managers/io_struct.py
sgl_jax/srt/managers/schedule_batch.py
sgl_jax/srt/managers/schedule_policy.py
sgl_jax/srt/managers/scheduler.py
sgl_jax/srt/managers/scheduler_metrics_mixin.py
sgl_jax/srt/managers/scheduler_output_processor_mixin.py
sgl_jax/srt/managers/scheduler_profiler_mixing.py
sgl_jax/srt/managers/template_manager.py
sgl_jax/srt/managers/tiktoken_tokenizer.py
sgl_jax/srt/managers/tokenizer_manager.py
sgl_jax/srt/managers/tp_worker.py
sgl_jax/srt/managers/tp_worker_overlap_thread.py
sgl_jax/srt/managers/utils.py
sgl_jax/srt/mem_cache/__init__.py
sgl_jax/srt/mem_cache/allocator.py
sgl_jax/srt/mem_cache/base_prefix_cache.py
sgl_jax/srt/mem_cache/chunk_cache.py
sgl_jax/srt/mem_cache/common.py
sgl_jax/srt/mem_cache/memory_pool.py
sgl_jax/srt/mem_cache/radix_cache.py
sgl_jax/srt/mem_cache/recurrent_state_pool.py
sgl_jax/srt/mem_cache/swa_radix_cache.py
sgl_jax/srt/model_executor/base_model_runner.py
sgl_jax/srt/model_executor/compilation_manager.py
sgl_jax/srt/model_executor/forward_batch_info.py
sgl_jax/srt/model_executor/model_runner.py
sgl_jax/srt/model_executor/model_runner_kv_cache_mixin.py
sgl_jax/srt/model_loader/__init__.py
sgl_jax/srt/model_loader/arch.py
sgl_jax/srt/model_loader/loader.py
sgl_jax/srt/models/bailing_moe.py
sgl_jax/srt/models/bailing_moe_linear.py
sgl_jax/srt/models/deepseek_v3.py
sgl_jax/srt/models/gemma2.py
sgl_jax/srt/models/glm4_moe.py
sgl_jax/srt/models/glm5_moe.py
sgl_jax/srt/models/grok.py
sgl_jax/srt/models/kimi_linear.py
sgl_jax/srt/models/llama.py
sgl_jax/srt/models/llama_eagle3.py
sgl_jax/srt/models/mimo.py
sgl_jax/srt/models/mimo_mtp.py
sgl_jax/srt/models/mimo_v2_flash.py
sgl_jax/srt/models/mimo_v2_nextn.py
sgl_jax/srt/models/mimo_v2_pro.py
sgl_jax/srt/models/qwen.py
sgl_jax/srt/models/qwen2.py
sgl_jax/srt/models/qwen2_moe.py
sgl_jax/srt/models/qwen3.py
sgl_jax/srt/models/qwen3_moe.py
sgl_jax/srt/models/registry.py
sgl_jax/srt/models/umt5.py
sgl_jax/srt/multimodal/tokenizer_utils.py
sgl_jax/srt/multimodal/common/ServerArgs.py
sgl_jax/srt/multimodal/common/modality_enum.py
sgl_jax/srt/multimodal/configs/config_registry.py
sgl_jax/srt/multimodal/configs/multimodal_base_config.py
sgl_jax/srt/multimodal/configs/dits/wan_model_config.py
sgl_jax/srt/multimodal/configs/mimo_audio/__init__.py
sgl_jax/srt/multimodal/configs/mimo_audio/mimo_audio_backbone_config.py
sgl_jax/srt/multimodal/configs/mimo_audio/mimo_audio_config.py
sgl_jax/srt/multimodal/configs/qwen_vl/qwen_2_5_vl_config.py
sgl_jax/srt/multimodal/configs/vaes/vae_base_config.py
sgl_jax/srt/multimodal/configs/vaes/wan_vae_config.py
sgl_jax/srt/multimodal/entrypoint/http_server.py
sgl_jax/srt/multimodal/kernels/flash_attention.py
sgl_jax/srt/multimodal/kernels/get_block_spec_config.py
sgl_jax/srt/multimodal/kernels/tuned_block_sizes.py
sgl_jax/srt/multimodal/layers/image_hash.py
sgl_jax/srt/multimodal/layers/layernorm.py
sgl_jax/srt/multimodal/layers/mlp.py
sgl_jax/srt/multimodal/layers/rotary_embedding.py
sgl_jax/srt/multimodal/layers/visual_embedding.py
sgl_jax/srt/multimodal/layers/attention/flash_attention_backend.py
sgl_jax/srt/multimodal/layers/attention/layer.py
sgl_jax/srt/multimodal/manager/device_manager.py
sgl_jax/srt/multimodal/manager/global_scheduler.py
sgl_jax/srt/multimodal/manager/io_struct.py
sgl_jax/srt/multimodal/manager/mrope_utils.py
sgl_jax/srt/multimodal/manager/multimodal_detokenizer.py
sgl_jax/srt/multimodal/manager/multimodal_tokenizer.py
sgl_jax/srt/multimodal/manager/prompt_builder.py
sgl_jax/srt/multimodal/manager/schedule_batch.py
sgl_jax/srt/multimodal/manager/stage.py
sgl_jax/srt/multimodal/manager/utils.py
sgl_jax/srt/multimodal/manager/scheduler/audio_backbone_scheduler.py
sgl_jax/srt/multimodal/manager/scheduler/audio_scheduler.py
sgl_jax/srt/multimodal/manager/scheduler/diffusion_scheduler.py
sgl_jax/srt/multimodal/manager/scheduler/embed_scheduler.py
sgl_jax/srt/multimodal/manager/scheduler/vae_scheduler.py
sgl_jax/srt/multimodal/manager/scheduler/vit_scheduler.py
sgl_jax/srt/multimodal/model_executor/audio/__init__.py
sgl_jax/srt/multimodal/model_executor/audio/audio_backbone_model_runner.py
sgl_jax/srt/multimodal/model_executor/audio/audio_backbone_model_worker.py
sgl_jax/srt/multimodal/model_executor/audio/audio_model_runner.py
sgl_jax/srt/multimodal/model_executor/audio/audio_model_worker.py
sgl_jax/srt/multimodal/model_executor/diffusion/diffusion_model_runner.py
sgl_jax/srt/multimodal/model_executor/diffusion/diffusion_model_worker.py
sgl_jax/srt/multimodal/model_executor/embed/embed_model_runner.py
sgl_jax/srt/multimodal/model_executor/embed/embed_model_worker.py
sgl_jax/srt/multimodal/model_executor/vae/vae_model_runner.py
sgl_jax/srt/multimodal/model_executor/vae/vae_model_worker.py
sgl_jax/srt/multimodal/model_executor/vit/vit_model_runner.py
sgl_jax/srt/multimodal/model_executor/vit/vit_model_worker.py
sgl_jax/srt/multimodal/models/diffusion_solvers/flow_unipc_multistep_scheduler.py
sgl_jax/srt/multimodal/models/mimo_audio/__init__.py
sgl_jax/srt/multimodal/models/mimo_audio/mimo_audio_backbone.py
sgl_jax/srt/multimodal/models/mimo_audio/mimo_audio_backbone_weights_mapping.py
sgl_jax/srt/multimodal/models/mimo_audio/mimo_audio_tokenizer.py
sgl_jax/srt/multimodal/models/mimo_audio/mimo_audio_tokenizer_weights_mapping.py
sgl_jax/srt/multimodal/models/qwen2_5VL/qwen2_5_vit.py
sgl_jax/srt/multimodal/models/qwen2_5VL/qwen2_5_vl_generation.py
sgl_jax/srt/multimodal/models/qwen3_omni_moe/audio_encoder.py
sgl_jax/srt/multimodal/models/qwen3_omni_moe/qwen3_omni_thinker.py
sgl_jax/srt/multimodal/models/qwen3_omni_moe/qwen3_omni_thinker_embedding.py
sgl_jax/srt/multimodal/models/qwen3_omni_moe/vision_encoder.py
sgl_jax/srt/multimodal/models/static_configs/__init__.py
sgl_jax/srt/multimodal/models/static_configs/mimo_audio_stage_config.yaml
sgl_jax/srt/multimodal/models/static_configs/qwen2_5_vl_stage_config.yaml
sgl_jax/srt/multimodal/models/static_configs/qwen2_5_vl_stage_config_tp4.yaml
sgl_jax/srt/multimodal/models/static_configs/qwen3_omni_stage_config.yaml
sgl_jax/srt/multimodal/models/static_configs/wan2_1_stage_config.yaml
sgl_jax/srt/multimodal/models/static_configs/wan2_2_stage_config.yaml
sgl_jax/srt/multimodal/models/static_configs/yaml_registry.py
sgl_jax/srt/multimodal/models/wan/diffusion/wan_dit.py
sgl_jax/srt/multimodal/models/wan/diffusion/wan_dit_weights_mapping.py
sgl_jax/srt/multimodal/models/wan/vaes/commons.py
sgl_jax/srt/multimodal/models/wan/vaes/vae_weights_mappings.py
sgl_jax/srt/multimodal/models/wan/vaes/wanvae.py
sgl_jax/srt/sampling/__init__.py
sgl_jax/srt/sampling/sampling_batch_info.py
sgl_jax/srt/sampling/sampling_params.py
sgl_jax/srt/sampling/penaltylib/__init__.py
sgl_jax/srt/sampling/penaltylib/frequency_penalty.py
sgl_jax/srt/sampling/penaltylib/min_new_tokens.py
sgl_jax/srt/sampling/penaltylib/orchestrator.py
sgl_jax/srt/sampling/penaltylib/presence_penalty.py
sgl_jax/srt/speculative/base_worker.py
sgl_jax/srt/speculative/eagle_draft_worker.py
sgl_jax/srt/speculative/eagle_util.py
sgl_jax/srt/speculative/eagle_worker.py
sgl_jax/srt/speculative/spec_info.py
sgl_jax/srt/utils/__init__.py
sgl_jax/srt/utils/common_utils.py
sgl_jax/srt/utils/debug_utils.py
sgl_jax/srt/utils/jax_utils.py
sgl_jax/srt/utils/mesh_utils.py
sgl_jax/srt/utils/parallel_utils.py
sgl_jax/srt/utils/profiling_utils.py
sgl_jax/srt/utils/tunix_utils.py
sgl_jax/srt/utils/weight_utils.py
sgl_jax/srt/utils/quantization/debug_utils.py
sgl_jax/srt/utils/quantization/quantization_utils.py
sgl_jax/srt/utils/quantization/configs/fp8.yaml
sgl_jax/srt/utils/quantization/configs/fp8_bailing.yaml
sgl_jax/srt/utils/quantization/configs/fp8_block_128_dynamic.yaml
sgl_jax/srt/utils/quantization/configs/fp8_deepseek_v3.yaml
sgl_jax/srt/utils/quantization/configs/fp8_grok.yaml
sgl_jax/srt/utils/quantization/configs/fp8_qwen3_30b_a3b.yaml
sgl_jax/srt/utils/quantization/configs/fp8_w8a8.yaml
sgl_jax/srt/utils/quantization/configs/int8.yaml
sgl_jax/srt/utils/quantization/configs/int8_block_128_dynamic.yaml
sgl_jax/srt/utils/quantization/configs/int8_moe_block_128_linear_channel_dynamic.yaml
sgl_jax/srt/utils/quantization/configs/int8_w8a8.yaml
sgl_jax/test/__init__.py
sgl_jax/test/long_prompt.txt
sgl_jax/test/runners.py
sgl_jax/test/test_bailing_moe_linear.py
sgl_jax/test/test_compilation_manager.py
sgl_jax/test/test_flashattention.py
sgl_jax/test/test_flashattention_dp.py
sgl_jax/test/test_kda_attention.py
sgl_jax/test/test_kda_attention_dp.py
sgl_jax/test/test_kernel_utils.py
sgl_jax/test/test_linear_tp.py
sgl_jax/test/test_mesh.py
sgl_jax/test/test_mixed_chunk_dp.py
sgl_jax/test/test_mla_attention.py
sgl_jax/test/test_moe_topk.py
sgl_jax/test/test_pagedattention.py
sgl_jax/test/test_sampler.py
sgl_jax/test/test_short_conv.py
sgl_jax/test/test_utils.py
sgl_jax/test/test_weight_loader_qkv_split.py
sgl_jax/test/constrained/test_bitmask_ops.py
sgl_jax/test/kernels/fused_moe_v1_test.py
sgl_jax/test/kernels/gmm_test.py
sgl_jax/test/kernels/moe_block_quant_test.py
sgl_jax/test/kernels/quantized_linear_test.py
sgl_jax/test/kernels/gdn/test_gated_delta.py
sgl_jax/test/kernels/gdn/test_ragged_gated_delta_rule_ref.py
sgl_jax/test/layers/__init__.py
sgl_jax/test/layers/mock_recurrent_state_pool.py
sgl_jax/test/layers/test_gdn_backend.py
sgl_jax/test/layers/test_group_rmsnorm.py
sgl_jax/test/layers/test_lightning_backend.py
sgl_jax/test/layers/test_lightning_backend_dp.py
sgl_jax/test/layers/test_merged_column_parallel_linear.py
sgl_jax/test/layers/test_qwen3_5_gated_delta_net.py
sgl_jax/test/layers/test_sequence_parallel.py
sgl_jax/test/mem_cache/run_multi_process_radix_cache_test.sh
sgl_jax/test/mem_cache/test_hybrid_req_to_token_pool.py
sgl_jax/test/mem_cache/test_kv_cache.py
sgl_jax/test/mem_cache/test_paged_allocator_multi_dp.py
sgl_jax/test/mem_cache/test_radix_cache.py
sgl_jax/test/mem_cache/test_req_to_token_pool.py
sgl_jax/test/mem_cache/test_swa_allocator.py
sgl_jax/test/mem_cache/test_swa_radix_cache.py
sgl_jax/test/models/test_mimo_v2_nextn.py
sgl_jax/test/multimodal/__init__.py
sgl_jax/test/multimodal/test_diffusion_precision.py
sgl_jax/test/multimodal/test_diffusion_scheduler.py
sgl_jax/test/multimodal/test_flash_attention_kernel.py
sgl_jax/test/multimodal/test_mimo_audio_tokenizer.py
sgl_jax/test/multimodal/test_qwen3_omni_moe_encoder.py
sgl_jax/test/multimodal/test_qwen3_omni_vision_encoder.py
sgl_jax/test/multimodal/test_qwen_omni_thinker.py
sgl_jax/test/multimodal/test_vae.py
sgl_jax/test/multimodal/test_vae_scheduler.py
sgl_jax/test/multimodal/test_wan2_1_dit.py
sgl_jax/test/multimodal/test_wan2_1_dit_weight_loading.py
sgl_jax/test/multimodal/test_wan_vae_precision.py
sgl_jax/test/multimodal/data/wan_vae_diffusers_decode_output.npy
sgl_jax/test/multimodal/data/wan_vae_diffusers_encode_output.npy
sgl_jax/test/multimodal/data/qwen3_omni/qwen3_omni_moe_thinker_text_prefill.txt
sgl_jax/test/speculative/test_eagle_tree_build.py
sgl_jax/test/speculative/test_eagle_utils.py
sgl_jax/test/speculative/test_spec_info.py
sgl_jax/tools/moe_parity_repro.py
sgl_jax/tools/trace_diff.py
sglang_jax.egg-info/PKG-INFO
sglang_jax.egg-info/SOURCES.txt
sglang_jax.egg-info/dependency_links.txt
sglang_jax.egg-info/requires.txt
sglang_jax.egg-info/top_level.txt