.clang-format
.gitignore
.gitmodules
.markdownlint.yaml
.pre-commit-config.yaml
.readthedocs.yaml
AGENTS.md
CLAUDE.md
CMakeLists.txt
CODE_OF_CONDUCT.md
CONTRIBUTING.md
DCO
Dockerfile
Dockerfile.310p
Dockerfile.310p.openEuler
Dockerfile.a3
Dockerfile.a3.openEuler
Dockerfile.openEuler
LICENSE
README.md
README.zh.md
codecov.yml
collect_env.py
format.sh
mypy.ini
packages.txt
pyproject.toml
requirements-dev.txt
requirements-lint.txt
requirements.txt
setup.py
typos.toml
.agents/README.md
.agents/skills/main2main/SKILL.md
.agents/skills/main2main/reference/adapt-guide.md
.agents/skills/main2main/reference/diagnosis-guide.md
.agents/skills/main2main/reference/error-pattern-examples.md
.agents/skills/main2main/reference/final-summary.md
.agents/skills/main2main/scripts/check_and_commit.py
.agents/skills/main2main/scripts/detect_commits.py
.agents/skills/main2main/scripts/lint_adapt_guide.py
.agents/skills/main2main/scripts/plan_steps.py
.agents/skills/main2main/scripts/pre_ci_check.py
.agents/skills/main2main/scripts/run_main2main_ci.py
.agents/skills/main2main/scripts/update_commit_reference.py
.agents/skills/vllm-ascend-model-adapter/SKILL.md
.agents/skills/vllm-ascend-model-adapter/references/deliverables.md
.agents/skills/vllm-ascend-model-adapter/references/fp8-on-npu-lessons.md
.agents/skills/vllm-ascend-model-adapter/references/multimodal-ep-aclgraph-lessons.md
.agents/skills/vllm-ascend-model-adapter/references/troubleshooting.md
.agents/skills/vllm-ascend-model-adapter/references/workflow-checklist.md
.agents/skills/vllm-ascend-release/SKILL.md
.agents/skills/vllm-ascend-release/references/ref-past-release-notes-highlight.md
.agents/skills/vllm-ascend-release/references/version-files.yaml
.agents/skills/vllm-ascend-release/scripts/fetch_commits.py
.agents/skills/vllm-ascend-release/scripts/generate_announcement.py
.agents/skills/vllm-ascend-release/scripts/generate_checklist.py
.agents/skills/vllm-ascend-release/scripts/scan_nightly_status.py
.agents/skills/vllm-ascend-release/scripts/scan_release_bugs.py
.agents/skills/vllm-ascend-release/scripts/scan_test_coverage.py
.agents/skills/vllm-ascend-release/scripts/update_checklist_section.py
.agents/skills/vllm-ascend-release/scripts/update_version_references.py
.agents/skills/vllm-ascend-release/templates/feedback-issue-template.md
.agents/skills/vllm-ascend-release/templates/release-checklist-template.md
.claude/README.md
.gemini/config.yaml
.gemini/styleguide.md
.github/CODEOWNERS
.github/PULL_REQUEST_TEMPLATE.md
.github/actionlint.yaml
.github/dependabot.yml
.github/issue-labeler.yml
.github/labeler.yml
.github/ISSUE_TEMPLATE/100-documentation.yml
.github/ISSUE_TEMPLATE/110-user-story.yml
.github/ISSUE_TEMPLATE/200-installation.yml
.github/ISSUE_TEMPLATE/300-usage.yml
.github/ISSUE_TEMPLATE/400-bug-report.yml
.github/ISSUE_TEMPLATE/500-feature-request.yml
.github/ISSUE_TEMPLATE/600-new-model.yml
.github/ISSUE_TEMPLATE/700-performance-discussion.yml
.github/ISSUE_TEMPLATE/750-RFC.yml
.github/ISSUE_TEMPLATE/800-others.yml
.github/ISSUE_TEMPLATE/900-release-checklist.yml
.github/ISSUE_TEMPLATE/config.yml
.github/workflows/README.md
.github/workflows/_e2e_nightly_multi_node.yaml
.github/workflows/_e2e_nightly_single_node.yaml
.github/workflows/_e2e_nightly_single_node_models.yaml
.github/workflows/_e2e_test.yaml
.github/workflows/_nightly_image_build.yaml
.github/workflows/_optional_smart_e2e.yaml
.github/workflows/_parse_trigger.yaml
.github/workflows/_pre_commit.yml
.github/workflows/_schedule_image_build.yaml
.github/workflows/bot_issue_manage.yaml
.github/workflows/bot_merge_conflict.yaml
.github/workflows/bot_pr_create.yaml
.github/workflows/dispatch_main2main_bisect.yaml
.github/workflows/labeled_doctest.yaml
.github/workflows/labled_download_model_dataset.yaml
.github/workflows/nightly_image_build.yaml
.github/workflows/pr_close_cancel_job.yaml
.github/workflows/pr_test_full.yaml
.github/workflows/pr_test_light.yaml
.github/workflows/push_build_csrc_cache.yaml
.github/workflows/schedule_doc_linkcheck.yaml
.github/workflows/schedule_doc_translate.yaml
.github/workflows/schedule_image_build_and_push.yaml
.github/workflows/schedule_lint_image_build.yaml
.github/workflows/schedule_nightly_test_a2.yaml
.github/workflows/schedule_nightly_test_a3.yaml
.github/workflows/schedule_release_code_and_wheel.yml
.github/workflows/schedule_stale_manage.yaml
.github/workflows/schedule_test_vllm_main.yaml
.github/workflows/schedule_update_estimated_time.yaml
.github/workflows/schedule_vllm_e2e_test.yaml
.github/workflows/schedule_weekly_test_a3.yaml
.github/workflows/dockerfiles/Dockerfile.buildwheel.310p
.github/workflows/dockerfiles/Dockerfile.buildwheel.a2
.github/workflows/dockerfiles/Dockerfile.buildwheel.a3
.github/workflows/dockerfiles/Dockerfile.lint
.github/workflows/dockerfiles/Dockerfile.nightly.a2
.github/workflows/dockerfiles/Dockerfile.nightly.a3
.github/workflows/matchers/actionlint.json
.github/workflows/matchers/markdownlint.json
.github/workflows/matchers/mypy.json
.github/workflows/misc/model_dataset_list.json
.github/workflows/scripts/ci_log_summary.py
.github/workflows/scripts/ci_utils.py
.github/workflows/scripts/config.yaml
.github/workflows/scripts/determine_smart_e2e_scope.py
.github/workflows/scripts/po_translate.py
.github/workflows/scripts/run_suite.py
.github/workflows/scripts/runner_label.json
.github/workflows/scripts/smart_ut_README.md
.github/workflows/scripts/update_estimated_time.py
.github/workflows/scripts/upstream_config.yaml
.github/workflows/scripts/ut_blacklist.yaml
.github/workflows/scripts/ut_config.yaml
.github/workflows/scripts/wheel/auto_exclude.py
.github/workflows/scripts/wheel/config.json
.github/workflows/scripts/wheel/make_variant.py
.github/workflows/scripts/wheel/pyproject.toml
benchmarks/README.md
benchmarks/requirements-bench.txt
benchmarks/ops/ben_vocabparallelembedding.py
benchmarks/scripts/convert_json_to_markdown.py
benchmarks/scripts/perf_result_template.md
benchmarks/scripts/run-performance-benchmarks.sh
benchmarks/tests/latency-tests.json
benchmarks/tests/serving-tests.json
benchmarks/tests/throughput-tests.json
cmake/utils.cmake
csrc/CMakeLists.txt
csrc/build.sh
csrc/build_aclnn.sh
csrc/camem_allocator.cpp
csrc/ops.h
csrc/torch_binding.cpp
csrc/torch_binding_meta.cpp
csrc/utils.h
csrc/version.info
csrc/aclnn_torch_adapter/NPUBridge.cpp
csrc/aclnn_torch_adapter/NPUBridge.h
csrc/aclnn_torch_adapter/NPUStorageImpl.cpp
csrc/aclnn_torch_adapter/NPUStorageImpl.h
csrc/aclnn_torch_adapter/op_api_common.h
csrc/attention/CMakeLists.txt
csrc/attention/compressor/CMakeLists.txt
csrc/attention/compressor/README.md
csrc/attention/compressor/op_host/CMakeLists.txt
csrc/attention/compressor/op_host/compressor_def.cpp
csrc/attention/compressor/op_host/compressor_proto.cpp
csrc/attention/compressor/op_host/compressor_tiling.cpp
csrc/attention/compressor/op_host/compressor_tiling.h
csrc/attention/compressor/op_kernel/compressor.cpp
csrc/attention/compressor/op_kernel/compressor_comm.h
csrc/attention/compressor/op_kernel/compressor_kernel.h
csrc/attention/compressor/op_kernel/compressor_kernel_perf.h
csrc/attention/compressor/op_kernel/compressor_template_tiling_key.h
csrc/attention/compressor/op_kernel/compressor_tiling_data.h
csrc/attention/compressor/op_kernel/compressor_tools.h
csrc/attention/compressor/op_kernel/arch32/compressor_block_cube_perf.h
csrc/attention/compressor/op_kernel/arch32/compressor_block_vec_perf.h
csrc/attention/compressor/op_kernel/arch32/compressor_vector_comm.h
csrc/attention/compressor/op_kernel/arch32/rms_norm.h
csrc/attention/compressor/op_kernel/arch32/rope.h
csrc/attention/compressor/op_kernel/arch32/soft_max.h
csrc/attention/compressor/op_kernel/arch35/compressor_block_cube_perf.h
csrc/attention/compressor/op_kernel/arch35/compressor_block_vec_perf.h
csrc/attention/compressor/op_kernel/arch35/vf/vf_add.h
csrc/attention/compressor/op_kernel/arch35/vf/vf_mul.h
csrc/attention/compressor/op_kernel/arch35/vf/vf_rms_norm.h
csrc/attention/compressor/op_kernel/arch35/vf/vf_rope.h
csrc/attention/compressor/op_kernel/arch35/vf/vf_softmax.h
csrc/attention/indexer_compress_epilog/CMakeLists.txt
csrc/attention/indexer_compress_epilog/op_host/CMakeLists.txt
csrc/attention/indexer_compress_epilog/op_host/indexer_compress_epilog_def.cpp
csrc/attention/indexer_compress_epilog/op_host/indexer_compress_epilog_proto.cpp
csrc/attention/indexer_compress_epilog/op_host/indexer_compress_epilog_tiling.cpp
csrc/attention/indexer_compress_epilog/op_host/indexer_compress_epilog_tiling.h
csrc/attention/indexer_compress_epilog/op_kernel/indexer_compress_epilog.cpp
csrc/attention/indexer_compress_epilog/op_kernel/indexer_compress_epilog_base.h
csrc/attention/indexer_compress_epilog/op_kernel/indexer_compress_epilog_multi_row.h
csrc/attention/indexer_compress_epilog/op_kernel/indexer_compress_epilog_multi_row_mx_fp8.h
csrc/attention/indexer_compress_epilog/op_kernel/indexer_compress_epilog_single_row.h
csrc/attention/indexer_compress_epilog/op_kernel/indexer_compress_epilog_single_row_mx_fp8.h
csrc/attention/indexer_compress_epilog_v2/CMakeLists.txt
csrc/attention/indexer_compress_epilog_v2/op_host/CMakeLists.txt
csrc/attention/indexer_compress_epilog_v2/op_host/indexer_compress_epilog_v2_def.cpp
csrc/attention/indexer_compress_epilog_v2/op_host/indexer_compress_epilog_v2_proto.cpp
csrc/attention/indexer_compress_epilog_v2/op_host/indexer_compress_epilog_v2_tiling.cpp
csrc/attention/indexer_compress_epilog_v2/op_host/indexer_compress_epilog_v2_tiling.h
csrc/attention/indexer_compress_epilog_v2/op_kernel/indexer_compress_epilog_v2.cpp
csrc/attention/indexer_compress_epilog_v2/op_kernel/indexer_compress_epilog_v2_base.h
csrc/attention/indexer_compress_epilog_v2/op_kernel/indexer_compress_epilog_v2_multi_row.h
csrc/attention/indexer_compress_epilog_v2/op_kernel/indexer_compress_epilog_v2_single_row.h
csrc/attention/inplace_partial_rotary_mul/CMakeLists.txt
csrc/attention/inplace_partial_rotary_mul/op_host/CMakeLists.txt
csrc/attention/inplace_partial_rotary_mul/op_host/inplace_partial_rotary_mul_a3_tiling.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/inplace_partial_rotary_mul_def.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/inplace_partial_rotary_mul_proto.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/inplace_partial_rotary_mul_tiling.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/inplace_partial_rotary_mul_tiling.h
csrc/attention/inplace_partial_rotary_mul/op_host/rope_regbase_tiling_a_and_b.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/rope_regbase_tiling_ab.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/rope_regbase_tiling_aba_and_ba.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/rope_regbase_tiling_bab.cpp
csrc/attention/inplace_partial_rotary_mul/op_host/rope_regbase_tiling_base.cpp
csrc/attention/inplace_partial_rotary_mul/op_kernel/apply_rotary_pos_emb_common.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/inplace_partial_rotary_mul.cpp
csrc/attention/inplace_partial_rotary_mul/op_kernel/inplace_partial_rotary_mul.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/inplace_partial_rotary_mul_common.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_a_and_b.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_a_and_b_mixed.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_ab.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_ab_mixed.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_aba_and_ba.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_aba_and_ba_mixed.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_bab.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotary_position_embedding_reg_bab_mixed.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_half.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_half_base.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_half_bf16.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_interleaved_common.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_interleaved_split_bs.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_interleaved_split_bs_pad.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_interleaved_split_bsn.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_interleaved_split_bsn_pad.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_interleaved_split_s.h
csrc/attention/inplace_partial_rotary_mul/op_kernel/rotate_interleaved_split_s_pad.h
csrc/attention/kv_compress_epilog/CMakeLists.txt
csrc/attention/kv_compress_epilog/op_host/CMakeLists.txt
csrc/attention/kv_compress_epilog/op_host/kv_compress_epilog_def.cpp
csrc/attention/kv_compress_epilog/op_host/kv_compress_epilog_tiling_arch35.cpp
csrc/attention/kv_compress_epilog/op_host/kv_compress_epilog_tiling_arch35.h
csrc/attention/kv_compress_epilog/op_kernel/kv_compress_epilog.cpp
csrc/attention/kv_compress_epilog/op_kernel/kv_compress_epilog.h
csrc/attention/kv_compress_epilog/op_kernel/kv_compress_epilog_common.h
csrc/attention/kv_quant_sparse_attn_sharedkv/CMakeLists.txt
csrc/attention/kv_quant_sparse_attn_sharedkv/README.md
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/CMakeLists.txt
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_check.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_check.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_check_consistancy.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_check_existance.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_check_feature.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_check_single_para.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_def.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_proto.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_tiling.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_host/kv_quant_sparse_attn_sharedkv_tiling.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/kv_quant_sparse_attn_sharedkv.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/kv_quant_sparse_attn_sharedkv_common.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/kv_quant_sparse_attn_sharedkv_metadata.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/kv_quant_sparse_attn_sharedkv_template_tiling_key.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/kv_quant_sparse_attn_sharedkv_common_arch35.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/kv_quant_sparse_attn_sharedkv_kvcache.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/kv_quant_sparse_attn_sharedkv_scfa_block_cube.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/kv_quant_sparse_attn_sharedkv_scfa_block_vector.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/kv_quant_sparse_attn_sharedkv_scfa_kernel.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/util_regbase.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/common/CopyInL1.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/common/FixpipeOut.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/common/buffer.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/common/buffer_manager.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/common/buffers_policy.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/common/matmul.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/common/offset_calculator.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_basic_block_aligned128_no_update_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_basic_block_aligned128_update_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_basic_block_unaligned128_no_update_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_basic_block_unaligned128_update_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_basic_block_unaligned64_no_update_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_basic_block_unaligned64_update_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_basic_block_utils.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_flashupdate_new_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv/op_kernel/arch35/vf/vf_mul_sel_softmaxflashv2_cast_nz_scfa.h
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/CMakeLists.txt
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/README.md
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_graph/kv_quant_sparse_attn_sharedkv_metadata_proto.h
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_host/CMakeLists.txt
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_host/kv_quant_sparse_attn_sharedkv_metadata_infershape.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_host/op_api/aclnn_kv_quant_sparse_attn_sharedkv_metadata.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_host/op_api/aclnn_kv_quant_sparse_attn_sharedkv_metadata.h
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_host/op_api/l0_kv_quant_sparse_attn_sharedkv_metadata.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_host/op_api/l0_kv_quant_sparse_attn_sharedkv_metadata.h
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_kernel_aicpu/CMakeLists.txt
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_kernel_aicpu/kv_quant_sparse_attn_sharedkv_metadata_aicpu.cpp
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_kernel_aicpu/kv_quant_sparse_attn_sharedkv_metadata_aicpu.h
csrc/attention/kv_quant_sparse_attn_sharedkv_metadata/op_kernel_aicpu/kv_quant_sparse_attn_sharedkv_metadata_aicpu.json
csrc/attention/lightning_indexer_quant/CMakeLists.txt
csrc/attention/lightning_indexer_quant/lightning_indexer_quant_torch_adpt.h
csrc/attention/lightning_indexer_quant/op_host/CMakeLists.txt
csrc/attention/lightning_indexer_quant/op_host/lightning_indexer_quant_def.cpp
csrc/attention/lightning_indexer_quant/op_host/lightning_indexer_quant_proto.cpp
csrc/attention/lightning_indexer_quant/op_host/lightning_indexer_quant_tiling.cpp
csrc/attention/lightning_indexer_quant/op_host/lightning_indexer_quant_tiling.h
csrc/attention/lightning_indexer_quant/op_kernel/lightning_indexer_quant.cpp
csrc/attention/lightning_indexer_quant/op_kernel/lightning_indexer_quant_common.h
csrc/attention/lightning_indexer_quant/op_kernel/lightning_indexer_quant_kernel.h
csrc/attention/lightning_indexer_quant/op_kernel/lightning_indexer_quant_service_cube.h
csrc/attention/lightning_indexer_quant/op_kernel/lightning_indexer_quant_service_vector.h
csrc/attention/lightning_indexer_quant/op_kernel/lightning_indexer_quant_template_tiling_key.h
csrc/attention/lightning_indexer_quant/op_kernel/lightning_indexer_quant_vector.h
csrc/attention/lightning_indexer_vllm/CMakeLists.txt
csrc/attention/lightning_indexer_vllm/lightning_indexer_vllm_torch_adpt.h
csrc/attention/lightning_indexer_vllm/op_host/CMakeLists.txt
csrc/attention/lightning_indexer_vllm/op_host/lightning_indexer_vllm_def.cpp
csrc/attention/lightning_indexer_vllm/op_host/lightning_indexer_vllm_proto.cpp
csrc/attention/lightning_indexer_vllm/op_host/lightning_indexer_vllm_tiling.cpp
csrc/attention/lightning_indexer_vllm/op_host/lightning_indexer_vllm_tiling.h
csrc/attention/lightning_indexer_vllm/op_kernel/lightning_indexer_common.h
csrc/attention/lightning_indexer_vllm/op_kernel/lightning_indexer_kernel.h
csrc/attention/lightning_indexer_vllm/op_kernel/lightning_indexer_service_cube.h
csrc/attention/lightning_indexer_vllm/op_kernel/lightning_indexer_service_vector.h
csrc/attention/lightning_indexer_vllm/op_kernel/lightning_indexer_template_tiling_key.h
csrc/attention/lightning_indexer_vllm/op_kernel/lightning_indexer_vector.h
csrc/attention/lightning_indexer_vllm/op_kernel/lightning_indexer_vllm.cpp
csrc/attention/load_index_kv_cache/CMakeLists.txt
csrc/attention/load_index_kv_cache/op_host/CMakeLists.txt
csrc/attention/load_index_kv_cache/op_host/load_index_kv_cache_def.cpp
csrc/attention/load_index_kv_cache/op_host/load_index_kv_cache_proto.cpp
csrc/attention/load_index_kv_cache/op_host/load_index_kv_cache_tiling.cpp
csrc/attention/load_index_kv_cache/op_host/load_index_kv_cache_tiling.h
csrc/attention/load_index_kv_cache/op_kernel/load_index_kv_cache.cpp
csrc/attention/load_index_kv_cache/op_kernel/load_index_kv_cache_base.h
csrc/attention/load_index_kv_cache/op_kernel/load_index_kv_cache_perf.h
csrc/attention/ngram_spec_decode/CMakeLists.txt
csrc/attention/ngram_spec_decode/ngram_spec_decode_torch_adpt.h
csrc/attention/ngram_spec_decode/op_host/CMakeLists.txt
csrc/attention/ngram_spec_decode/op_host/ngram_spec_decode_def.cpp
csrc/attention/ngram_spec_decode/op_host/ngram_spec_decode_tiling.cpp
csrc/attention/ngram_spec_decode/op_host/ngram_spec_decode_tiling.h
csrc/attention/ngram_spec_decode/op_host/op_api/aclnn_ngram_spec_decode.cpp
csrc/attention/ngram_spec_decode/op_host/op_api/aclnn_ngram_spec_decode.h
csrc/attention/ngram_spec_decode/op_kernel/ngram_spec_decode.cpp
csrc/attention/ngram_spec_decode/op_kernel/ngram_spec_decode.h
csrc/attention/quant_lightning_indexer/CMakeLists.txt
csrc/attention/quant_lightning_indexer/README.md
csrc/attention/quant_lightning_indexer/op_host/CMakeLists.txt
csrc/attention/quant_lightning_indexer/op_host/quant_lightning_indexer_def.cpp
csrc/attention/quant_lightning_indexer/op_host/quant_lightning_indexer_infershape.cpp
csrc/attention/quant_lightning_indexer/op_host/quant_lightning_indexer_tiling.cpp
csrc/attention/quant_lightning_indexer/op_host/quant_lightning_indexer_tiling.h
csrc/attention/quant_lightning_indexer/op_kernel/quant_lightning_indexer.cpp
csrc/attention/quant_lightning_indexer/op_kernel/quant_lightning_indexer_metadata.h
csrc/attention/quant_lightning_indexer/op_kernel/quant_lightning_indexer_template_tiling_key.h
csrc/attention/quant_lightning_indexer/op_kernel/arch32/quant_lightning_indexer_common.h
csrc/attention/quant_lightning_indexer/op_kernel/arch32/quant_lightning_indexer_kernel.h
csrc/attention/quant_lightning_indexer/op_kernel/arch32/quant_lightning_indexer_service_cube.h
csrc/attention/quant_lightning_indexer/op_kernel/arch32/quant_lightning_indexer_service_vector.h
csrc/attention/quant_lightning_indexer/op_kernel/arch32/quant_lightning_indexer_vector.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/quant_lightning_indexer_common.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/quant_lightning_indexer_kernel.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/quant_lightning_indexer_service_cube.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/quant_lightning_indexer_service_vector.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/vf/quant_lightning_indexer_topk.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/vf/quant_lightning_indexer_vector1.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/vf/vf_topk.h
csrc/attention/quant_lightning_indexer/op_kernel/arch35/vf/vf_topk_16_gather.h
csrc/attention/quant_lightning_indexer_metadata/CMakeLists.txt
csrc/attention/quant_lightning_indexer_metadata/README.md
csrc/attention/quant_lightning_indexer_metadata/op_api/aclnn_quant_lightning_indexer_metadata.cpp
csrc/attention/quant_lightning_indexer_metadata/op_api/aclnn_quant_lightning_indexer_metadata.h
csrc/attention/quant_lightning_indexer_metadata/op_api/l0_quant_lightning_indexer_metadata.cpp
csrc/attention/quant_lightning_indexer_metadata/op_api/l0_quant_lightning_indexer_metadata.h
csrc/attention/quant_lightning_indexer_metadata/op_graph/quant_lightning_indexer_metadata_proto.h
csrc/attention/quant_lightning_indexer_metadata/op_host/quant_lightning_indexer_metadata_infershape.cpp
csrc/attention/quant_lightning_indexer_metadata/op_kernel_aicpu/quant_lightning_indexer_metadata_aicpu.cpp
csrc/attention/quant_lightning_indexer_metadata/op_kernel_aicpu/quant_lightning_indexer_metadata_aicpu.h
csrc/attention/quant_lightning_indexer_metadata/op_kernel_aicpu/quant_lightning_indexer_metadata_aicpu.json
csrc/attention/recurrent_gated_delta_rule/CMakeLists.txt
csrc/attention/recurrent_gated_delta_rule/recurrent_gated_delta_rule_torch_adpt.h
csrc/attention/recurrent_gated_delta_rule/op_host/CMakeLists.txt
csrc/attention/recurrent_gated_delta_rule/op_host/math_util.h
csrc/attention/recurrent_gated_delta_rule/op_host/recurrent_gated_delta_rule.h
csrc/attention/recurrent_gated_delta_rule/op_host/recurrent_gated_delta_rule_def.cpp
csrc/attention/recurrent_gated_delta_rule/op_host/recurrent_gated_delta_rule_infershape.cpp
csrc/attention/recurrent_gated_delta_rule/op_host/recurrent_gated_delta_rule_tiling.cpp
csrc/attention/recurrent_gated_delta_rule/op_host/recurrent_gated_delta_rule_tiling.h
csrc/attention/recurrent_gated_delta_rule/op_host/op_api/aclnn_recurrent_gated_delta_rule.cpp
csrc/attention/recurrent_gated_delta_rule/op_host/op_api/aclnn_recurrent_gated_delta_rule.h
csrc/attention/recurrent_gated_delta_rule/op_host/op_api/recurrent_gated_delta_rule.cpp
csrc/attention/recurrent_gated_delta_rule/op_kernel/recurrent_gated_delta_rule.cpp
csrc/attention/recurrent_gated_delta_rule/op_kernel/recurrent_gated_delta_rule.h
csrc/attention/recurrent_gated_delta_rule/op_kernel/recurrent_gated_delta_rule_tiling_data.h
csrc/attention/recurrent_gated_delta_rule_v310/CMakeLists.txt
csrc/attention/recurrent_gated_delta_rule_v310/recurrent_gated_delta_rule_310_torch_adpt.h
csrc/attention/recurrent_gated_delta_rule_v310/op_host/CMakeLists.txt
csrc/attention/recurrent_gated_delta_rule_v310/op_host/math_util.h
csrc/attention/recurrent_gated_delta_rule_v310/op_host/recurrent_gated_delta_rule_v310.h
csrc/attention/recurrent_gated_delta_rule_v310/op_host/recurrent_gated_delta_rule_v310_def.cpp
csrc/attention/recurrent_gated_delta_rule_v310/op_host/recurrent_gated_delta_rule_v310_infershape.cpp
csrc/attention/recurrent_gated_delta_rule_v310/op_host/recurrent_gated_delta_rule_v310_tiling.cpp
csrc/attention/recurrent_gated_delta_rule_v310/op_host/recurrent_gated_delta_rule_v310_tiling.h
csrc/attention/recurrent_gated_delta_rule_v310/op_host/op_api/aclnn_recurrent_gated_delta_rule_v310.cpp
csrc/attention/recurrent_gated_delta_rule_v310/op_host/op_api/aclnn_recurrent_gated_delta_rule_v310.h
csrc/attention/recurrent_gated_delta_rule_v310/op_host/op_api/recurrent_gated_delta_rule_v310.cpp
csrc/attention/recurrent_gated_delta_rule_v310/op_kernel/recurrent_gated_delta_rule_v310.cpp
csrc/attention/recurrent_gated_delta_rule_v310/op_kernel/recurrent_gated_delta_rule_v310.h
csrc/attention/recurrent_gated_delta_rule_v310/op_kernel/recurrent_gated_delta_rule_v310_tiling_data.h
csrc/attention/reshape_and_cache_bnsd/CMakeLists.txt
csrc/attention/reshape_and_cache_bnsd/op_host/CMakeLists.txt
csrc/attention/reshape_and_cache_bnsd/op_host/reshape_and_cache_bnsd.cpp
csrc/attention/reshape_and_cache_bnsd/op_host/reshape_and_cache_bnsd_proto.cpp
csrc/attention/reshape_and_cache_bnsd/op_host/reshape_and_cache_bnsd_tiling.cpp
csrc/attention/reshape_and_cache_bnsd/op_host/reshape_and_cache_bnsd_tiling.h
csrc/attention/reshape_and_cache_bnsd/op_kernel/kernel_utils.h
csrc/attention/reshape_and_cache_bnsd/op_kernel/reshape_and_cache_bnsd.cpp
csrc/attention/rms_norm_dynamic_quant/CMakeLists.txt
csrc/attention/rms_norm_dynamic_quant/op_host/CMakeLists.txt
csrc/attention/rms_norm_dynamic_quant/op_host/rms_norm_dynamic_quant_def.cpp
csrc/attention/rms_norm_dynamic_quant/op_host/rms_norm_dynamic_quant_proto.cpp
csrc/attention/rms_norm_dynamic_quant/op_host/rms_norm_dynamic_quant_tiling.cpp
csrc/attention/rms_norm_dynamic_quant/op_host/rms_norm_dynamic_quant_tiling.h
csrc/attention/rms_norm_dynamic_quant/op_kernel/reduce_common.h
csrc/attention/rms_norm_dynamic_quant/op_kernel/rms_norm_dynamic_quant.cpp
csrc/attention/rms_norm_dynamic_quant/op_kernel/rms_norm_dynamic_quant_base.h
csrc/attention/rms_norm_dynamic_quant/op_kernel/rms_norm_dynamic_quant_cut_d_kernel.h
csrc/attention/rms_norm_dynamic_quant/op_kernel/rms_norm_dynamic_quant_helper.h
csrc/attention/rms_norm_dynamic_quant/op_kernel/rms_norm_dynamic_quant_normal_kernel.h
csrc/attention/rms_norm_dynamic_quant/op_kernel/rms_norm_dynamic_quant_single_row_kernel.h
csrc/attention/sparse_attn_sharedkv/CMakeLists.txt
csrc/attention/sparse_attn_sharedkv/README.md
csrc/attention/sparse_attn_sharedkv/op_host/CMakeLists.txt
csrc/attention/sparse_attn_sharedkv/op_host/sparse_attn_sharedkv_def.cpp
csrc/attention/sparse_attn_sharedkv/op_host/sparse_attn_sharedkv_proto.cpp
csrc/attention/sparse_attn_sharedkv/op_host/sparse_attn_sharedkv_tiling.cpp
csrc/attention/sparse_attn_sharedkv/op_host/sparse_attn_sharedkv_tiling.h
csrc/attention/sparse_attn_sharedkv/op_kernel/sparse_attn_sharedkv.cpp
csrc/attention/sparse_attn_sharedkv/op_kernel/sparse_attn_sharedkv_common.h
csrc/attention/sparse_attn_sharedkv/op_kernel/sparse_attn_sharedkv_metadata.h
csrc/attention/sparse_attn_sharedkv/op_kernel/sparse_attn_sharedkv_template_tiling_key.h
csrc/attention/sparse_attn_sharedkv/op_kernel/arch32/sparse_attn_sharedkv_scfa_block_cube.h
csrc/attention/sparse_attn_sharedkv/op_kernel/arch32/sparse_attn_sharedkv_scfa_block_vector.h
csrc/attention/sparse_attn_sharedkv/op_kernel/arch32/sparse_attn_sharedkv_scfa_kernel.h
csrc/attention/sparse_attn_sharedkv/op_kernel/arch32/sparse_attn_sharedkv_swa_block_cube.h
csrc/attention/sparse_attn_sharedkv/op_kernel/arch32/sparse_attn_sharedkv_swa_block_vector.h
csrc/attention/sparse_attn_sharedkv/op_kernel/arch32/sparse_attn_sharedkv_swa_kernel.h
csrc/attention/sparse_attn_sharedkv_metadata/CMakeLists.txt
csrc/attention/sparse_attn_sharedkv_metadata/README.md
csrc/attention/sparse_attn_sharedkv_metadata/op_api/aclnn_sparse_attn_sharedkv_metadata.cpp
csrc/attention/sparse_attn_sharedkv_metadata/op_api/aclnn_sparse_attn_sharedkv_metadata.h
csrc/attention/sparse_attn_sharedkv_metadata/op_api/l0_sparse_attn_sharedkv_metadata.cpp
csrc/attention/sparse_attn_sharedkv_metadata/op_api/l0_sparse_attn_sharedkv_metadata.h
csrc/attention/sparse_attn_sharedkv_metadata/op_graph/sparse_attn_sharedkv_metadata_proto.h
csrc/attention/sparse_attn_sharedkv_metadata/op_host/sparse_attn_sharedkv_metadata_infershape.cpp
csrc/attention/sparse_attn_sharedkv_metadata/op_kernel_aicpu/sparse_attn_sharedkv_metadata_aicpu.cpp
csrc/attention/sparse_attn_sharedkv_metadata/op_kernel_aicpu/sparse_attn_sharedkv_metadata_aicpu.h
csrc/attention/sparse_attn_sharedkv_metadata/op_kernel_aicpu/sparse_attn_sharedkv_metadata_aicpu.json
csrc/attention/sparse_flash_attention/CMakeLists.txt
csrc/attention/sparse_flash_attention/sparse_flash_attention_torch_adpt.h
csrc/attention/sparse_flash_attention/op_host/CMakeLists.txt
csrc/attention/sparse_flash_attention/op_host/sparse_flash_attention_def.cpp
csrc/attention/sparse_flash_attention/op_host/sparse_flash_attention_proto.cpp
csrc/attention/sparse_flash_attention/op_host/sparse_flash_attention_tiling.cpp
csrc/attention/sparse_flash_attention/op_host/sparse_flash_attention_tiling.h
csrc/attention/sparse_flash_attention/op_kernel/sparse_flash_attention.cpp
csrc/attention/sparse_flash_attention/op_kernel/sparse_flash_attention_common.h
csrc/attention/sparse_flash_attention/op_kernel/sparse_flash_attention_kernel_mla.h
csrc/attention/sparse_flash_attention/op_kernel/sparse_flash_attention_service_cube_mla.h
csrc/attention/sparse_flash_attention/op_kernel/sparse_flash_attention_service_vector_mla.h
csrc/attention/sparse_flash_attention/op_kernel/sparse_flash_attention_template_tiling_key.h
csrc/batch_matmul_transpose/batch_matmul_transpose_torch_adpt.h
csrc/batch_matmul_transpose/op_host/batch_matmul_transpose.h
csrc/batch_matmul_transpose/op_host/common.h
csrc/batch_matmul_transpose/op_host/common_tiling.h
csrc/batch_matmul_transpose/op_host/tiling/tiling_data.cpp
csrc/batch_matmul_transpose/op_host/tiling/tiling_data.h
csrc/batch_matmul_transpose/op_kernel/batch_matmul_transpose_kernel.cpp
csrc/cmake/Third_Party_Open_Source_Software_List.yaml
csrc/cmake/aclnn_ops_transformer.h.in
csrc/cmake/build_empty_package.cmake
csrc/cmake/config.cmake
csrc/cmake/config_utest.cmake
csrc/cmake/custom_build.cmake
csrc/cmake/dependencies.cmake
csrc/cmake/func.cmake
csrc/cmake/func_examples.cmake
csrc/cmake/func_utest.cmake
csrc/cmake/gen_ops_info.cmake
csrc/cmake/intf.cmake
csrc/cmake/intf_pub.cmake
csrc/cmake/intf_pub_examples.cmake
csrc/cmake/intf_pub_linux.cmake
csrc/cmake/intf_pub_llt_gccnative.cmake
csrc/cmake/intf_pub_utest.cmake
csrc/cmake/makeself_built_in.cmake
csrc/cmake/makeself_custom.cmake
csrc/cmake/obj_func.cmake
csrc/cmake/opbuild.cmake
csrc/cmake/package.cmake
csrc/cmake/rty_obj_func.cmake
csrc/cmake/runtimeKB.cmake
csrc/cmake/static.cmake
csrc/cmake/symbol.cmake
csrc/cmake/ut.cmake
csrc/cmake/variables.cmake
csrc/cmake/modules/FindOPBASE.cmake
csrc/cmake/modules/FindPython.cmake
csrc/cmake/modules/Findaicpu.cmake
csrc/cmake/modules/Findalog.cmake
csrc/cmake/modules/Finddlog.cmake
csrc/cmake/modules/Findjson.cmake
csrc/cmake/modules/Findmetadef.cmake
csrc/cmake/modules/Findnnopbase.cmake
csrc/cmake/modules/Findoptiling.cmake
csrc/cmake/modules/Findplatform.cmake
csrc/cmake/modules/Findruntime.cmake
csrc/cmake/modules/Findsecurec.cmake
csrc/cmake/modules/Findtilingapi.cmake
csrc/cmake/modules/Findunified_dlog.cmake
csrc/cmake/scripts/check_version_compatible.py
csrc/cmake/scripts/convert_yaml.py
csrc/cmake/scripts/fix_format.sh
csrc/cmake/scripts/parse_changed_files.py
csrc/cmake/scripts/prepare.sh
csrc/cmake/scripts/custom/help.info
csrc/cmake/scripts/custom/install.sh
csrc/cmake/scripts/custom/upgrade.sh
csrc/cmake/scripts/examples/get_opapi_abs_path.py
csrc/cmake/scripts/examples/get_soc_info.py
csrc/cmake/scripts/utest/gen_coverage.py
csrc/cmake/scripts/utest/gen_tiling_data_stub.py
csrc/cmake/scripts/util/ascendc_bin_param_build.py
csrc/cmake/scripts/util/ascendc_gen_options.py
csrc/cmake/scripts/util/ascendc_impl_build.py
csrc/cmake/scripts/util/ascendc_ops_config.py
csrc/cmake/scripts/util/const_var.py
csrc/cmake/scripts/util/gen_version_info.sh
csrc/cmake/scripts/util/opdesc_parser.py
csrc/cmake/scripts/util/parse_ini_to_json.py
csrc/cmake/third_party/.gitkeep
csrc/cmake/third_party/abseil-cpp.cmake
csrc/cmake/third_party/ascend_protobuf.cmake
csrc/cmake/third_party/gtest.cmake
csrc/cmake/third_party/json.cmake
csrc/cmake/third_party/makeself-fetch.cmake
csrc/cmake/third_party/protobuf.cmake
csrc/cmake/third_party/secure_c.cmake
csrc/cmake/third_party/build/modules/patch/protobuf-hide_absl_symbols.patch
csrc/cmake/third_party/build/modules/patch/protobuf_25.1_change_version.patch
csrc/common/CMakeLists.txt
csrc/common/aicpu/cpu_context_util.h
csrc/common/include/common/op_api_def.h
csrc/common/include/common/tensor_util.cpp
csrc/common/include/common/tensor_util.h
csrc/common/include/err/ops_err.h
csrc/common/include/external/aclnn_util.h
csrc/common/include/external/aclnn_kernels/cast.h
csrc/common/include/external/aclnn_kernels/contiguous.h
csrc/common/include/external/aclnn_kernels/pad.h
csrc/common/include/external/aclnn_kernels/reshape.h
csrc/common/include/external/aclnn_kernels/slice.h
csrc/common/include/external/aclnn_kernels/transdata.h
csrc/common/include/external/aclnn_kernels/transpose.h
csrc/common/include/external/aclnn_kernels/common/op_error_check.h
csrc/common/include/fallback/fallback.h
csrc/common/include/fallback/fallback_2stages.h
csrc/common/include/fallback/fallback_comm.h
csrc/common/include/fallback/fallback_comm_2stages.h
csrc/common/include/framework/onnx_common.h
csrc/common/include/kernel/common.h
csrc/common/include/kernel/common_func.h
csrc/common/include/kernel/dropmask.h
csrc/common/include/kernel/gm_to_l1_iterator.h
csrc/common/include/kernel/gm_to_ub_iterator.h
csrc/common/include/kernel/hardware.h
csrc/common/include/kernel/iterator.h
csrc/common/include/kernel/l0c_to_gm_iterator.h
csrc/common/include/kernel/l0c_to_l1_iterator.h
csrc/common/include/kernel/l0c_to_ub_iterator.h
csrc/common/include/kernel/l1_to_bt_iterator.h
csrc/common/include/kernel/l1_to_fb_iterator.h
csrc/common/include/kernel/l1_to_l0_iterator.h
csrc/common/include/kernel/l1_to_ub_iterator.h
csrc/common/include/kernel/layout.h
csrc/common/include/kernel/mem.h
csrc/common/include/kernel/mma.h
csrc/common/include/kernel/pse.h
csrc/common/include/kernel/simd.h
csrc/common/include/kernel/util.h
csrc/common/include/op_graph/op_transformer_proto_extend.h
csrc/common/include/static/op_resource.h
csrc/common/include/static/static_space.h
csrc/common/include/tiling_base/data_copy_transpose_tiling.h
csrc/common/include/tiling_base/data_copy_transpose_tiling_def.h
csrc/common/include/tiling_base/error_log.h
csrc/common/include/tiling_base/tiling_base.h
csrc/common/include/tiling_base/tiling_key.h
csrc/common/include/tiling_base/tiling_templates_registry.h
csrc/common/include/tiling_base/tiling_type.h
csrc/common/include/tiling_base/tiling_util.h
csrc/common/include/tiling_sink/device_op_impl_registry_impl.h
csrc/common/include/tiling_sink/tiling_aicpu_task.h
csrc/common/src/fallback_comm.cpp
csrc/common/src/fallback_comm_2stages.cpp
csrc/common/src/framework/CMakeLists.txt
csrc/common/src/framework/embedding_bag_onnx_plugin.cpp
csrc/common/src/framework/fillwindowcache_onnx_plugin.cpp
csrc/common/src/framework/multi_head_attention_onnx_plugin.cpp
csrc/common/src/framework/npu_fused_attention_score_fwd_onnx_plugin.cpp
csrc/common/src/framework/npu_fused_attention_score_onnx_plugin.cpp
csrc/common/src/framework/npu_masked_softmax_with_relposbias_onnx_plugin.cpp
csrc/common/src/framework/npu_scaled_masked_softmax_onnx_plugin.cpp
csrc/common/src/framework/tfidf_vectorizer_onnx_plugin.cpp
csrc/common/src/tiling_base/tiling_util.cpp
csrc/common/src/tiling_sink/CMakeLists.txt
csrc/common/src/tiling_sink/tiling_sink_registry.cpp
csrc/common/stub/CMakeLists.txt
csrc/common/stub/inc/framework/stub_ops.h
csrc/common/stub/op_api/CMakeLists.txt
csrc/common/stub/op_api/opapi_stub.cpp
csrc/common/stub/op_api/aclnn_kernels/cast.h
csrc/common/stub/op_api/aclnn_kernels/contiguous.h
csrc/common/stub/op_api/aclnn_kernels/pad.h
csrc/common/stub/op_api/aclnn_kernels/reshape.h
csrc/common/stub/op_api/aclnn_kernels/slice.h
csrc/common/stub/op_api/aclnn_kernels/transdata.h
csrc/common/stub/op_api/aclnn_kernels/transpose.h
csrc/common/stub/op_api/aclnn_kernels/common/op_error_check.h
csrc/common/stub/op_api/level0/add.h
csrc/common/stub/op_api/level0/arange.h
csrc/common/stub/op_api/level0/axpy.h
csrc/common/stub/op_api/level0/batch_norm_backward.h
csrc/common/stub/op_api/level0/broadcast_to.h
csrc/common/stub/op_api/level0/dilation.h
csrc/common/stub/op_api/level0/div.h
csrc/common/stub/op_api/level0/dot.h
csrc/common/stub/op_api/level0/expand.h
csrc/common/stub/op_api/level0/fault_injection.h
csrc/common/stub/op_api/level0/fill.h
csrc/common/stub/op_api/level0/gather_elements.h
csrc/common/stub/op_api/level0/gather_v2.h
csrc/common/stub/op_api/level0/gather_v3.h
csrc/common/stub/op_api/level0/inplace_index_add.h
csrc/common/stub/op_api/level0/masked_scatter.h
csrc/common/stub/op_api/level0/matmul_v2tov3.h
csrc/common/stub/op_api/level0/maximum.h
csrc/common/stub/op_api/level0/minimum.h
csrc/common/stub/op_api/level0/mul.h
csrc/common/stub/op_api/level0/muls.h
csrc/common/stub/op_api/level0/ones_like.h
csrc/common/stub/op_api/level0/padv3.h
csrc/common/stub/op_api/level0/reduce_mean.h
csrc/common/stub/op_api/level0/reduce_sum_op.h
csrc/common/stub/op_api/level0/shape_op.h
csrc/common/stub/op_api/level0/sort.h
csrc/common/stub/op_api/level0/squeeze.h
csrc/common/stub/op_api/level0/sub.h
csrc/common/stub/op_api/level0/tensor_move.h
csrc/common/stub/op_api/level0/unsqueeze.h
csrc/common/stub/op_api/level0/zero_op.h
csrc/common/stub/op_tiling/CMakeLists.txt
csrc/common/stub/op_tiling/op_cache_def_tiling.h
csrc/common/stub/op_tiling/op_cache_tiling.cpp
csrc/common/stub/op_tiling/op_cache_tiling.h
csrc/common/stub/op_tiling/runtime_kb_api.cpp
csrc/common/stub/op_tiling/runtime_kb_api.h
csrc/common/stub/op_tiling/tbe_tiling_api.cpp
csrc/common/stub/op_tiling/tbe_tiling_api.h
csrc/common/stub/op_tiling/register/tuning_bank_key_registry.h
csrc/common/stub/op_tiling/register/tuning_tiling_reflection_utils.h
csrc/common/stub/op_tiling/register/tuning_tiling_registry.h
csrc/gmm/CMakeLists.txt
csrc/gmm/grouped_matmul_swiglu_quant/CMakeLists.txt
csrc/gmm/grouped_matmul_swiglu_quant/README.md
csrc/gmm/grouped_matmul_swiglu_quant/op_host/CMakeLists.txt
csrc/gmm/grouped_matmul_swiglu_quant/op_host/grouped_matmul_swiglu_quant_def.cpp
csrc/gmm/grouped_matmul_swiglu_quant/op_host/grouped_matmul_swiglu_quant_infershape.cpp
csrc/gmm/grouped_matmul_swiglu_quant/op_host/grouped_matmul_swiglu_quant_tiling.cpp
csrc/gmm/grouped_matmul_swiglu_quant/op_host/grouped_matmul_swiglu_quant_tiling.h
csrc/gmm/grouped_matmul_swiglu_quant/op_host/op_api/aclnn_grouped_matmul_swiglu_quant.cpp
csrc/gmm/grouped_matmul_swiglu_quant/op_host/op_api/aclnn_grouped_matmul_swiglu_quant.h
csrc/gmm/grouped_matmul_swiglu_quant/op_host/op_api/aclnn_grouped_matmul_swiglu_quant_weight_nz.h
csrc/gmm/grouped_matmul_swiglu_quant/op_host/op_api/grouped_matmul_swiglu_quant.cpp
csrc/gmm/grouped_matmul_swiglu_quant/op_host/op_api/grouped_matmul_swiglu_quant.h
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_pipeline.h
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_quant.cpp
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_quant.h
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_quant_a8w4_msd_mid.h
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_quant_a8w4_msd_post.h
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_quant_a8w4_msd_pre.h
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_quant_split_ws.h
csrc/gmm/grouped_matmul_swiglu_quant/op_kernel/grouped_matmul_swiglu_quant_utils.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/CMakeLists.txt
csrc/gmm/grouped_matmul_swiglu_quant_v2/grouped_matmul_swiglu_quant_v2_torch_adpt.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/CMakeLists.txt
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_base_tiling.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_base_tiling.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_def.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_fusion_tiling.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_fusion_tiling.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_host_utils.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_infershape.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_tiling.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/grouped_matmul_swiglu_quant_v2_tiling.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/aclnn_grouped_matmul_swiglu_quant_v2.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/aclnn_grouped_matmul_swiglu_quant_v2.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/aclnn_grouped_matmul_swiglu_quant_weight_nz_v2.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/gmm_dsq_base.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/grouped_matmul_swiglu_quant_utils.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/grouped_matmul_swiglu_quant_v2.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/grouped_matmul_swiglu_quant_v2.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_host/op_api/grouped_matmul_swiglu_quant_v2_utils.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_spilit_fusion.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_a4w4_mid.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_a4w4_pipeline.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_a4w4_post.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_a8w4_msd_mid.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_a8w4_msd_pipeline.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_a8w4_msd_post.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_a8w4_msd_pre.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_apt.cpp
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/grouped_matmul_swiglu_quant_v2_utils.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/arch35/grouped_matmul_swiglu_quant_v2_mxquant.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/arch35/grouped_matmul_swiglu_quant_v2_pertoken_quant.h
csrc/gmm/grouped_matmul_swiglu_quant_v2/op_kernel/arch35/grouped_matmul_swiglu_quant_v2_tiling_key.h
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/CMakeLists.txt
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/grouped_matmul_swiglu_quant_torch_adpt.h
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/CMakeLists.txt
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_def.cpp
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_infershape.cpp
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_proto.cpp
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.cpp
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/grouped_matmul_swiglu_quant_weight_nz_tensor_list_tiling.h
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/op_api/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/op_api/aclnn_grouped_matmul_swiglu_quant_weight_nz_tensor_list.h
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_host/op_api/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.cpp
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list.h
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_split_ws.h
csrc/gmm/grouped_matmul_swiglu_quant_weight_nz_tensor_list/op_kernel/grouped_matmul_swiglu_quant_weight_nz_tensor_list_utils.h
csrc/kernels/bgmv_expand.cpp
csrc/kernels/bgmv_shrink.cpp
csrc/kernels/get_masked_input_and_mask_kernel.cpp
csrc/kernels/math_utils.h
csrc/kernels/sgmv_expand.cpp
csrc/kernels/sgmv_shrink.cpp
csrc/kernels/types.h
csrc/kernels/utils.h
csrc/mc2/CMakeLists.txt
csrc/mc2/dispatch_ffn_combine/CMakeLists.txt
csrc/mc2/dispatch_ffn_combine/dispatch_ffn_combine_torch_adpt.h
csrc/mc2/dispatch_ffn_combine/op_host/CMakeLists.txt
csrc/mc2/dispatch_ffn_combine/op_host/dispatch_ffn_combine_def.cpp
csrc/mc2/dispatch_ffn_combine/op_host/dispatch_ffn_combine_proto.cpp
csrc/mc2/dispatch_ffn_combine/op_host/dispatch_ffn_combine_tiling.cpp
csrc/mc2/dispatch_ffn_combine/op_host/hcom_topo_info.h
csrc/mc2/dispatch_ffn_combine/op_host/tiling_args.h
csrc/mc2/dispatch_ffn_combine/op_host/op_api/aclnn_dispatch_ffn_combine.cpp
csrc/mc2/dispatch_ffn_combine/op_host/op_api/aclnn_dispatch_ffn_combine.h
csrc/mc2/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.cpp
csrc/mc2/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine.h
csrc/mc2/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_kernel.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/dispatch_ffn_combine_tiling.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2_tiling.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_common.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_out.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_quant.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_init_routing_fullload.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_base.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_one_core.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h
csrc/mc2/dispatch_ffn_combine/op_kernel/moe_init_routing_quant_v2/tiling_base.h
csrc/mc2/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute.h
csrc/mc2/dispatch_ffn_combine/op_kernel/unpermute/moe_token_unpermute_tiling.h
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_row.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/block_epilogue_pertoken_v2.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/const_args.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/copy_gm_to_l1_custom.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/copy_l0c_to_gm_custom.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/dispatch_policy_custom.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/get_tensor_addr.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/hccl_shmem.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/layout3d.hpp
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/moe_distribute_base.h
csrc/mc2/dispatch_ffn_combine/op_kernel/utils/select_helper.hpp
csrc/mc2/dispatch_ffn_combine_bf16/CMakeLists.txt
csrc/mc2/dispatch_ffn_combine_bf16/op_host/CMakeLists.txt
csrc/mc2/dispatch_ffn_combine_bf16/op_host/dispatch_ffn_combine_bf16_def.cpp
csrc/mc2/dispatch_ffn_combine_bf16/op_host/dispatch_ffn_combine_bf16_proto.cpp
csrc/mc2/dispatch_ffn_combine_bf16/op_host/dispatch_ffn_combine_bf16_tiling.cpp
csrc/mc2/dispatch_ffn_combine_bf16/op_host/hcom_topo_info.h
csrc/mc2/dispatch_ffn_combine_bf16/op_host/tiling_args.h
csrc/mc2/dispatch_ffn_combine_bf16/op_host/op_api/aclnn_dispatch_ffn_combine_bf16.cpp
csrc/mc2/dispatch_ffn_combine_bf16/op_host/op_api/aclnn_dispatch_ffn_combine_bf16.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16.cpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16_kernel.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/dispatch_ffn_combine_bf16_tiling.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_init_routing_v2.cpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_init_routing_v2_tiling.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_common.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_expert_token_out.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_gather_out.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_init_routing_fullload.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_mrgsort.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_mrgsort_out.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_sort_base.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_sort_multi_core.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_sort_one_core.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_src_to_dst_op.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_src_to_dst_op_simt.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/moe_v2_src_to_dst_with_capacity.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/moe_init_routing_v2/tiling_base.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/unpermute/moe_token_unpermute.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/unpermute/moe_token_unpermute_tiling.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/block_epilogue_pertoken_row.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/block_epilogue_pertoken_swiglu.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/block_epilogue_pertoken_v2.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/block_mmad_preload_async_fixpipe_quant.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/const_args.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/copy_gm_to_l1_custom.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/copy_l0c_to_gm_custom.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/dispatch_policy_custom.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/get_tensor_addr.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/hccl_shmem.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/layout3d.hpp
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/moe_distribute_base.h
csrc/mc2/dispatch_ffn_combine_bf16/op_kernel/utils/select_helper.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/CMakeLists.txt
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/CMakeLists.txt
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/dispatch_ffn_combine_w4_a8_def.cpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/dispatch_ffn_combine_w4_a8_proto.cpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/dispatch_ffn_combine_w4_a8_tiling.cpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/hcom_topo_info.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/tiling_args.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/op_api/aclnn_dispatch_ffn_combine_w4_a8.cpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_host/op_api/aclnn_dispatch_ffn_combine_w4_a8.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/dispatch_ffn_combine_w4_a8.cpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/dispatch_ffn_combine_w4_a8.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/dispatch_ffn_combine_w4_a8_kernel.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/dispatch_ffn_combine_w4_a8_tiling.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2.cpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_init_routing_quant_v2_tiling.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_init_routing_v2_tiling.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_common.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_expert_token_out.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_dynamic_quant.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_fullload_quant_base.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_dynamic_quant.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_out.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_gather_quant.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_init_routing_fullload.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_mrgsort_out.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_base.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_multi_core.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_sort_one_core.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_and_gather.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_op.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/moe_v2_src_to_dst_with_capacity.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/moe_init_routing_quant_v2/tiling_base.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/unpermute/moe_token_unpermute.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/unpermute/moe_token_unpermute_tiling.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/block_epilogue_w4a8post_pertoken_swiglu.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/block_epilogue_w4a8post_pertoken_v2.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/block_mmad_w4a4.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/const_args.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/copy_gm_to_l1_custom.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/dispatch_policy_custom.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/get_tensor_addr.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/hccl_shmem.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/layout3d.hpp
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/moe_distribute_base.h
csrc/mc2/dispatch_ffn_combine_w4_a8/op_kernel/utils/select_helper.hpp
csrc/mc2/dispatch_gmm_combine_decode/CMakeLists.txt
csrc/mc2/dispatch_gmm_combine_decode/dispatch_gmm_combine_decode_torch_adpt.h
csrc/mc2/dispatch_gmm_combine_decode/op_host/CMakeLists.txt
csrc/mc2/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_def.cpp
csrc/mc2/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_proto.cpp
csrc/mc2/dispatch_gmm_combine_decode/op_host/dispatch_gmm_combine_decode_tiling.cpp
csrc/mc2/dispatch_gmm_combine_decode/op_host/op_api/aclnn_dispatch_gmm_combine_decode.cpp
csrc/mc2/dispatch_gmm_combine_decode/op_host/op_api/aclnn_dispatch_gmm_combine_decode.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.cpp
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_base.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_bf16_fp16.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode_tiling.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/moe_distribute_base.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/dispatch_policy.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_bf16_fp16.hpp
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant.hpp
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_per_token_dequant_swiglu.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/block/block_epilogue_swiglu_bf16_fp16.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_binary.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/epilogue/tile/tile_stride_muls.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/dispatch_policy.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/block/block_mmad_preload_async_with_callback_resident_a.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_multistage_workspace_bf16_fp16.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_multistage_workspace.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_per_token_dequant_swiglu_quant_multistage_workspace.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/gemm/kernel/grouped_matmul_slice_m_swiglu_multistage_workspace_bf16_fp16.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_combine.h
csrc/mc2/dispatch_gmm_combine_decode/op_kernel/dispatch_gmm_combine_decode/raw_distributed/cam_moe_distribute_dispatch.h
csrc/mc2/dispatch_layout/CMakeLists.txt
csrc/mc2/dispatch_layout/dispatch_layout_torch_adpt.h
csrc/mc2/dispatch_layout/op_host/CMakeLists.txt
csrc/mc2/dispatch_layout/op_host/dispatch_layout.cpp
csrc/mc2/dispatch_layout/op_host/dispatch_layout_tiling.cpp
csrc/mc2/dispatch_layout/op_host/op_api/aclnn_dispatch_layout.cpp
csrc/mc2/dispatch_layout/op_host/op_api/aclnn_dispatch_layout.h
csrc/mc2/dispatch_layout/op_kernel/dispatch_layout.cpp
csrc/mc2/dispatch_layout/op_kernel/dispatch_layout.h
csrc/mc2/dispatch_layout/op_kernel/dispatch_layout_tiling.h
csrc/mc2/dispatch_layout/op_kernel/kernel/comm_args.h
csrc/mc2/dispatch_layout/op_kernel/kernel/data_copy.h
csrc/mc2/dispatch_layout/op_kernel/kernel/dropmask.h
csrc/mc2/dispatch_layout/op_kernel/kernel/moe_distribute_base.h
csrc/mc2/dispatch_layout/op_kernel/kernel/pse.h
csrc/mc2/dispatch_layout/op_kernel/kernel/sync_collectives.h
csrc/mc2/dispatch_layout/op_kernel/kernel/util.h
csrc/mc2/matmul_allreduce_add_rmsnorm/CMakeLists.txt
csrc/mc2/matmul_allreduce_add_rmsnorm/matmul_allreduce_add_rmsnorm_torch_adpt.h
csrc/mc2/matmul_allreduce_add_rmsnorm/op_host/CMakeLists.txt
csrc/mc2/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_def.cpp
csrc/mc2/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_proto.cpp
csrc/mc2/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_tiling.cpp
csrc/mc2/matmul_allreduce_add_rmsnorm/op_host/matmul_allreduce_add_rmsnorm_workspace.h
csrc/mc2/matmul_allreduce_add_rmsnorm/op_host/op_api/aclnn_matmul_allreduce_add_rmsnorm.cpp
csrc/mc2/matmul_allreduce_add_rmsnorm/op_host/op_api/aclnn_matmul_allreduce_add_rmsnorm.h
csrc/mc2/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm.cpp
csrc/mc2/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aic_kernel.h
csrc/mc2/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_aiv_kernel.h
csrc/mc2/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_tiling.h
csrc/mc2/matmul_allreduce_add_rmsnorm/op_kernel/matmul_allreduce_add_rmsnorm_utils.h
csrc/mc2/moe_combine_normal/CMakeLists.txt
csrc/mc2/moe_combine_normal/moe_combine_normal_torch_adpt.h
csrc/mc2/moe_combine_normal/op_host/CMakeLists.txt
csrc/mc2/moe_combine_normal/op_host/moe_combine_normal.cpp
csrc/mc2/moe_combine_normal/op_host/moe_combine_normal_tiling.cpp
csrc/mc2/moe_combine_normal/op_host/op_api/aclnn_moe_combine_normal.cpp
csrc/mc2/moe_combine_normal/op_host/op_api/aclnn_moe_combine_normal.h
csrc/mc2/moe_combine_normal/op_kernel/moe_combine_normal.cpp
csrc/mc2/moe_combine_normal/op_kernel/moe_combine_normal.h
csrc/mc2/moe_combine_normal/op_kernel/moe_combine_normal_tiling.h
csrc/mc2/moe_combine_normal/op_kernel/utils/moe_distribute_base.h
csrc/mc2/moe_dispatch_normal/CMakeLists.txt
csrc/mc2/moe_dispatch_normal/op_host/CMakeLists.txt
csrc/mc2/moe_dispatch_normal/op_host/moe_dispatch_normal.cpp
csrc/mc2/moe_dispatch_normal/op_host/moe_dispatch_normal_tiling.cpp
csrc/mc2/moe_dispatch_normal/op_host/op_api/aclnn_moe_dispatch_normal.cpp
csrc/mc2/moe_dispatch_normal/op_host/op_api/aclnn_moe_dispatch_normal.h
csrc/mc2/moe_dispatch_normal/op_kernel/moe_dispatch_normal.cpp
csrc/mc2/moe_dispatch_normal/op_kernel/moe_dispatch_normal.h
csrc/mc2/moe_dispatch_normal/op_kernel/moe_dispatch_normal_tiling.h
csrc/mc2/moe_dispatch_normal/op_kernel/utils/moe_distribute_base.h
csrc/mc2/notify_dispatch/CMakeLists.txt
csrc/mc2/notify_dispatch/op_host/CMakeLists.txt
csrc/mc2/notify_dispatch/op_host/notify_dispatch.cpp
csrc/mc2/notify_dispatch/op_host/notify_dispatch_tiling.cpp
csrc/mc2/notify_dispatch/op_host/op_api/aclnn_notify_dispatch.cpp
csrc/mc2/notify_dispatch/op_host/op_api/aclnn_notify_dispatch.h
csrc/mc2/notify_dispatch/op_kernel/notify_dispatch.cpp
csrc/mc2/notify_dispatch/op_kernel/notify_dispatch.h
csrc/mc2/notify_dispatch/op_kernel/notify_dispatch_tiling.h
csrc/mc2/notify_dispatch/op_kernel/kernel/comm_args.h
csrc/mc2/notify_dispatch/op_kernel/kernel/data_copy.h
csrc/mc2/notify_dispatch/op_kernel/kernel/dropmask.h
csrc/mc2/notify_dispatch/op_kernel/kernel/moe_distribute_base.h
csrc/mc2/notify_dispatch/op_kernel/kernel/pse.h
csrc/mc2/notify_dispatch/op_kernel/kernel/sync_collectives.h
csrc/mc2/notify_dispatch/op_kernel/kernel/util.h
csrc/mla_preprocess/mla_preprocess_torch_adpt.h
csrc/mla_preprocess/op_host/mla_preprocess.h
csrc/mla_preprocess/op_host/tiling/mla_preprocess_tiling.h
csrc/mla_preprocess/op_kernel/mla_preprocess.h
csrc/mla_preprocess/op_kernel/mla_preprocess_kernel.cpp
csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16.hpp
csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_nq.hpp
csrc/mla_preprocess/op_kernel/mla_preprocess_mix_bf16_qdown.hpp
csrc/mla_preprocess/op_kernel/mla_preprocess_mix_fp16.hpp
csrc/mla_preprocess/op_kernel/kernel/common.h
csrc/mla_preprocess/op_kernel/kernel/common_func.h
csrc/mla_preprocess/op_kernel/kernel/hardware.h
csrc/mla_preprocess/op_kernel/kernel/iterator.h
csrc/mla_preprocess/op_kernel/kernel/kernel_utils.h
csrc/mla_preprocess/op_kernel/kernel/layout.h
csrc/mla_preprocess/op_kernel/kernel/mem.h
csrc/mla_preprocess/op_kernel/kernel/mma.h
csrc/mla_preprocess/op_kernel/kernel/set_fpc.h
csrc/mla_preprocess/op_kernel/kernel/simd.h
csrc/mla_preprocess/op_kernel/kernel/utils.h
csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_l1_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/gm_to_ub_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_gm_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_l1_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l0c_to_ub_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_bt_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_fb_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_l0_iterator.inc
csrc/mla_preprocess/op_kernel/kernel/iterators/l1_to_ub_iterator.inc
csrc/moe/CMakeLists.txt
csrc/moe/add_rms_norm_bias/CMakeLists.txt
csrc/moe/add_rms_norm_bias/add_rms_norm_bias_torch_adpt.h
csrc/moe/add_rms_norm_bias/op_host/CMakeLists.txt
csrc/moe/add_rms_norm_bias/op_host/add_rms_norm_bias_def.cpp
csrc/moe/add_rms_norm_bias/op_host/add_rms_norm_bias_infershape.cpp
csrc/moe/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.cpp
csrc/moe/add_rms_norm_bias/op_host/add_rms_norm_bias_tiling.h
csrc/moe/add_rms_norm_bias/op_kernel/add_rms_norm_bias.cpp
csrc/moe/add_rms_norm_bias/op_kernel/add_rms_norm_bias.h
csrc/moe/add_rms_norm_bias/op_kernel/add_rms_norm_bias_merge_n.h
csrc/moe/add_rms_norm_bias/op_kernel/add_rms_norm_bias_multi_n.h
csrc/moe/add_rms_norm_bias/op_kernel/add_rms_norm_bias_single_n.h
csrc/moe/add_rms_norm_bias/op_kernel/add_rms_norm_bias_split_d.h
csrc/moe/add_rms_norm_bias/op_kernel/reduce_common.h
csrc/moe/add_rms_norm_bias/op_kernel/rms_norm_base.h
csrc/moe/apply_top_k_top_p_custom/CMakeLists.txt
csrc/moe/apply_top_k_top_p_custom/apply_top_k_top_p_custom_torch_adpt.h
csrc/moe/apply_top_k_top_p_custom/op_host/CMakeLists.txt
csrc/moe/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom.h
csrc/moe/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom_def.cpp
csrc/moe/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom_tiling.cpp
csrc/moe/apply_top_k_top_p_custom/op_host/apply_top_k_top_p_custom_tiling.h
csrc/moe/apply_top_k_top_p_custom/op_host/sort.h
csrc/moe/apply_top_k_top_p_custom/op_host/op_api/aclnn_apply_top_k_top_p_custom.cpp
csrc/moe/apply_top_k_top_p_custom/op_host/op_api/aclnn_apply_top_k_top_p_custom.h
csrc/moe/apply_top_k_top_p_custom/op_host/op_api/apply_top_k_top_p_custom.cpp
csrc/moe/apply_top_k_top_p_custom/op_kernel/apply_top_k_top_p_custom.cpp
csrc/moe/apply_top_k_top_p_custom/op_kernel/apply_top_k_top_p_custom.h
csrc/moe/apply_top_k_top_p_custom/op_kernel/apply_top_p_custom.h
csrc/moe/causal_conv1d/CMakeLists.txt
csrc/moe/causal_conv1d/op_host/CMakeLists.txt
csrc/moe/causal_conv1d/op_host/causal_conv1d_def.cpp
csrc/moe/causal_conv1d/op_host/causal_conv1d_infershape.cpp
csrc/moe/causal_conv1d/op_host/causal_conv1d_tiling.cpp
csrc/moe/causal_conv1d/op_host/causal_conv1d_tiling_planner.h
csrc/moe/causal_conv1d/op_host/causal_conv1d_tiling_utils.h
csrc/moe/causal_conv1d/op_host/causal_conv1d_tiling_validation.h
csrc/moe/causal_conv1d/op_host/math_util.h
csrc/moe/causal_conv1d/op_kernel/causal_conv1d.cpp
csrc/moe/causal_conv1d/op_kernel/causal_conv1d.h
csrc/moe/causal_conv1d/op_kernel/causal_conv1d_common.h
csrc/moe/causal_conv1d/op_kernel/causal_conv1d_fn.h
csrc/moe/causal_conv1d/op_kernel/causal_conv1d_fn_tasks.h
csrc/moe/causal_conv1d/op_kernel/causal_conv1d_tiling_data.h
csrc/moe/causal_conv1d/op_kernel/causal_conv1d_tiling_key.h
csrc/moe/causal_conv1d/op_kernel/causal_conv1d_update.h
csrc/moe/causal_conv1d_v310/CMakeLists.txt
csrc/moe/causal_conv1d_v310/causal_conv1d_310_torch_adpt.h
csrc/moe/causal_conv1d_v310/op_host/CMakeLists.txt
csrc/moe/causal_conv1d_v310/op_host/causal_conv1d_v310_def.cpp
csrc/moe/causal_conv1d_v310/op_host/causal_conv1d_v310_infershape.cpp
csrc/moe/causal_conv1d_v310/op_host/causal_conv1d_v310_tiling.cpp
csrc/moe/causal_conv1d_v310/op_host/causal_conv1d_v310_tiling.h
csrc/moe/causal_conv1d_v310/op_host/math_util.h
csrc/moe/causal_conv1d_v310/op_kernel/causal_conv1d_v310.cpp
csrc/moe/causal_conv1d_v310/op_kernel/causal_conv1d_v310.h
csrc/moe/causal_conv1d_v310/op_kernel/causal_conv1d_v310_common.h
csrc/moe/causal_conv1d_v310/op_kernel/causal_conv1d_v310_tiling_data.h
csrc/moe/causal_conv1d_v310/op_kernel/causal_conv1d_v310_tiling_key.h
csrc/moe/chunk_fwd_o/CMakeLists.txt
csrc/moe/chunk_fwd_o/op_host/CMakeLists.txt
csrc/moe/chunk_fwd_o/op_host/chunk_fwd_o_def.cpp
csrc/moe/chunk_fwd_o/op_host/chunk_fwd_o_tiling.cpp
csrc/moe/chunk_fwd_o/op_host/chunk_fwd_o_tiling.h
csrc/moe/chunk_fwd_o/op_host/op_api/aclnn_chunk_fwd_o.cpp
csrc/moe/chunk_fwd_o/op_host/op_api/aclnn_chunk_fwd_o.h
csrc/moe/chunk_fwd_o/op_host/op_api/chunk_fwd_o.cpp
csrc/moe/chunk_fwd_o/op_host/op_api/chunk_fwd_o.h
csrc/moe/chunk_fwd_o/op_kernel/chunk_fwd_o.cpp
csrc/moe/chunk_fwd_o/op_kernel/epilogue/gdn_fwd_o_epilogue_policies.hpp
csrc/moe/chunk_fwd_o/op_kernel/epilogue/block/block_epilogue_gdn_fwdo_output.hpp
csrc/moe/chunk_fwd_o/op_kernel/epilogue/block/block_epilogue_gdn_fwdo_qkmask.hpp
csrc/moe/chunk_fwd_o/op_kernel/gemm/block/block_scheduler_gdn_fwd_o.hpp
csrc/moe/chunk_fwd_o/op_kernel/gemm/kernel/gdn_fwd_o_kernel.hpp
csrc/moe/chunk_fwd_o/tiling_base/data_copy_transpose_tiling.h
csrc/moe/chunk_fwd_o/tiling_base/data_copy_transpose_tiling_def.h
csrc/moe/chunk_fwd_o/tiling_base/error_log.h
csrc/moe/chunk_fwd_o/tiling_base/tiling_base.h
csrc/moe/chunk_fwd_o/tiling_base/tiling_key.h
csrc/moe/chunk_fwd_o/tiling_base/tiling_templates_registry.h
csrc/moe/chunk_fwd_o/tiling_base/tiling_type.h
csrc/moe/chunk_fwd_o/tiling_base/tiling_util.h
csrc/moe/chunk_gated_delta_rule_fwd_h/CMakeLists.txt
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/CMakeLists.txt
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/chunk_gated_delta_rule_fwd_h_def.cpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/chunk_gated_delta_rule_fwd_h_tiling.cpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/chunk_gated_delta_rule_fwd_h_tiling.h
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/op_api/aclnn_chunk_gated_delta_rule_fwd_h.cpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/op_api/aclnn_chunk_gated_delta_rule_fwd_h.h
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/op_api/chunk_gated_delta_rule_fwd_h.cpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_host/op_api/chunk_gated_delta_rule_fwd_h.h
csrc/moe/chunk_gated_delta_rule_fwd_h/op_kernel/chunk_gated_delta_rule_fwd_h.cpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_kernel/epilogue/gdn_fwd_h_epilogue_policies.hpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_kernel/epilogue/block/block_epilogue_gdn_fwdh_update.hpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_kernel/epilogue/block/block_epilogue_gdn_fwdh_vnew.hpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_kernel/gemm/block/block_scheduler_gdn_fwd_h.hpp
csrc/moe/chunk_gated_delta_rule_fwd_h/op_kernel/gemm/kernel/gdn_fwd_h_kernel.hpp
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/data_copy_transpose_tiling.h
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/data_copy_transpose_tiling_def.h
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/error_log.h
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/tiling_base.h
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/tiling_key.h
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/tiling_templates_registry.h
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/tiling_type.h
csrc/moe/chunk_gated_delta_rule_fwd_h/tiling_base/tiling_util.h
csrc/moe/common/kernel_utils/block/block_mmad_pingpong_tla_multi.hpp
csrc/moe/copy_and_expand_eagle_inputs/CMakeLists.txt
csrc/moe/copy_and_expand_eagle_inputs/op_host/CMakeLists.txt
csrc/moe/copy_and_expand_eagle_inputs/op_host/copy_and_expand_eagle_inputs_def.cpp
csrc/moe/copy_and_expand_eagle_inputs/op_host/copy_and_expand_eagle_inputs_infershape.cpp
csrc/moe/copy_and_expand_eagle_inputs/op_host/copy_and_expand_eagle_inputs_tiling.cpp
csrc/moe/copy_and_expand_eagle_inputs/op_host/copy_and_expand_eagle_inputs_tiling.h
csrc/moe/copy_and_expand_eagle_inputs/op_kernel/copy_and_expand_eagle_inputs.cpp
csrc/moe/dequant_swiglu_quant/CMakeLists.txt
csrc/moe/dequant_swiglu_quant/op_host/CMakeLists.txt
csrc/moe/dequant_swiglu_quant/op_host/dequant_swiglu_quant_def.cpp
csrc/moe/dequant_swiglu_quant/op_host/dequant_swiglu_quant_infershape.cpp
csrc/moe/dequant_swiglu_quant/op_host/dequant_swiglu_quant_proto.h
csrc/moe/dequant_swiglu_quant/op_host/dequant_swiglu_quant_tiling.cpp
csrc/moe/dequant_swiglu_quant/op_host/dequant_swiglu_quant_tiling.h
csrc/moe/dequant_swiglu_quant/op_host/dequant_swiglu_quant_tiling_arch35.cpp
csrc/moe/dequant_swiglu_quant/op_host/dequant_swiglu_quant_tiling_base.cpp
csrc/moe/dequant_swiglu_quant/op_host/swi_glu_grad_regbase_tiling.h
csrc/moe/dequant_swiglu_quant/op_host/swi_glu_grad_tiling_regbase.h
csrc/moe/dequant_swiglu_quant/op_host/swi_glu_tiling.h
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant.cpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant.h
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_apt.cpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_cut_group.h
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_dynamic_base.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_dynamic_bf16.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_dynamic_bias_float.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_dynamic_bias_int32.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_dynamic_performance.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_static_base.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_static_bf16.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_static_bias_float.hpp
csrc/moe/dequant_swiglu_quant/op_kernel/dequant_swiglu_quant_static_bias_int32.hpp
csrc/moe/dequant_swiglu_quant/tiling_base/error_log.h
csrc/moe/dequant_swiglu_quant/tiling_base/static_register_symbol.h
csrc/moe/dequant_swiglu_quant/tiling_base/tiling_base.h
csrc/moe/dequant_swiglu_quant/tiling_base/tiling_key.h
csrc/moe/dequant_swiglu_quant/tiling_base/tiling_templates_registry.h
csrc/moe/dequant_swiglu_quant/tiling_base/tiling_util.h
csrc/moe/hamming_dist_top_k/CMakeLists.txt
csrc/moe/hamming_dist_top_k/op_host/CMakeLists.txt
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k.cpp
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k.h
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k_def.cpp
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k_proto.cpp
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k_split.cpp
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k_split.h
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k_tiling.cpp
csrc/moe/hamming_dist_top_k/op_host/hamming_dist_top_k_tiling.h
csrc/moe/hamming_dist_top_k/op_host/op_host_util.h
csrc/moe/hamming_dist_top_k/op_kernel/hamming_dist_top_k.cpp
csrc/moe/hamming_dist_top_k/op_kernel/hamming_dist_top_k_base.h
csrc/moe/hamming_dist_top_k/op_kernel/hamming_dist_top_k_parallel.h
csrc/moe/hamming_dist_top_k/op_kernel/hamming_dist_top_k_split_s.h
csrc/moe/hc_post/CMakeLists.txt
csrc/moe/hc_post/op_host/CMakeLists.txt
csrc/moe/hc_post/op_host/hc_post_def.cpp
csrc/moe/hc_post/op_host/hc_post_proto.cpp
csrc/moe/hc_post/op_host/hc_post_tiling.cpp
csrc/moe/hc_post/op_host/hc_post_tiling.h
csrc/moe/hc_post/op_host/hc_post_tiling_arch35.h
csrc/moe/hc_post/op_kernel/hc_post.cpp
csrc/moe/hc_post/op_kernel/hc_post_bfloat16.h
csrc/moe/hc_post/op_kernel/hc_post_d_split.h
csrc/moe/hc_post/op_kernel/hc_post_float32.h
csrc/moe/hc_pre/CMakeLists.txt
csrc/moe/hc_pre/op_host/CMakeLists.txt
csrc/moe/hc_pre/op_host/hc_pre_def.cpp
csrc/moe/hc_pre/op_host/hc_pre_proto.cpp
csrc/moe/hc_pre/op_host/hc_pre_tiling.cpp
csrc/moe/hc_pre/op_host/hc_pre_tiling.h
csrc/moe/hc_pre/op_host/hc_pre_tiling_arch35.h
csrc/moe/hc_pre/op_kernel/hc_pre.cpp
csrc/moe/hc_pre/op_kernel/hc_pre_base.h
csrc/moe/hc_pre/op_kernel/hc_pre_base_arch35.h
csrc/moe/hc_pre/op_kernel/hc_pre_cube_compute.h
csrc/moe/hc_pre/op_kernel/hc_pre_cube_compute_arch35.h
csrc/moe/hc_pre/op_kernel/hc_pre_m_k_split_core.h
csrc/moe/hc_pre/op_kernel/hc_pre_m_k_split_core_arch35.h
csrc/moe/hc_pre/op_kernel/hc_pre_m_split_core_arch35.h
csrc/moe/hc_pre_inv_rms/CMakeLists.txt
csrc/moe/hc_pre_inv_rms/op_host/CMakeLists.txt
csrc/moe/hc_pre_inv_rms/op_host/hc_pre_inv_rms_def.cpp
csrc/moe/hc_pre_inv_rms/op_host/hc_pre_inv_rms_proto.cpp
csrc/moe/hc_pre_inv_rms/op_host/hc_pre_inv_rms_tiling.cpp
csrc/moe/hc_pre_inv_rms/op_host/hc_pre_inv_rms_tiling.h
csrc/moe/hc_pre_inv_rms/op_host/hc_pre_inv_rms_tiling_arch35.h
csrc/moe/hc_pre_inv_rms/op_host/hc_pre_inv_rms_tiling_large_d.h
csrc/moe/hc_pre_inv_rms/op_kernel/hc_pre_inv_rms.cpp
csrc/moe/hc_pre_inv_rms/op_kernel/hc_pre_inv_rms_full_load.h
csrc/moe/hc_pre_inv_rms/op_kernel/hc_pre_inv_rms_full_load_large_d.h
csrc/moe/hc_pre_inv_rms/op_kernel/hc_pre_inv_rms_full_load_regbase.h
csrc/moe/hc_pre_sinkhorn/CMakeLists.txt
csrc/moe/hc_pre_sinkhorn/op_host/CMakeLists.txt
csrc/moe/hc_pre_sinkhorn/op_host/hc_pre_sinkhorn_def.cpp
csrc/moe/hc_pre_sinkhorn/op_host/hc_pre_sinkhorn_proto.cpp
csrc/moe/hc_pre_sinkhorn/op_host/hc_pre_sinkhorn_tiling.cpp
csrc/moe/hc_pre_sinkhorn/op_host/hc_pre_sinkhorn_tiling.h
csrc/moe/hc_pre_sinkhorn/op_kernel/hc_pre_sinkhorn.cpp
csrc/moe/hc_pre_sinkhorn/op_kernel/hc_pre_sinkhorn_base.h
csrc/moe/hc_pre_sinkhorn/op_kernel/hc_pre_sinkhorn_perf.h
csrc/moe/hc_pre_sinkhorn/op_kernel/hc_pre_sinkhorn_regbase_base.h
csrc/moe/hc_pre_sinkhorn/op_kernel/hc_pre_sinkhorn_regbase_perf.h
csrc/moe/moe_gating_top_k/CMakeLists.txt
csrc/moe/moe_gating_top_k/moe_gating_top_k_torch_adpt.h
csrc/moe/moe_gating_top_k/op_host/CMakeLists.txt
csrc/moe/moe_gating_top_k/op_host/math_util.h
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_def.cpp
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_infershape.cpp
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_proto.cpp
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_proto.h
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_tiling.cpp
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_tiling.h
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_tiling_arch35.cpp
csrc/moe/moe_gating_top_k/op_host/moe_gating_top_k_tiling_base.cpp
csrc/moe/moe_gating_top_k/op_kernel/common.h
csrc/moe/moe_gating_top_k/op_kernel/error_log.h
csrc/moe/moe_gating_top_k/op_kernel/moe_gating_top_k.cpp
csrc/moe/moe_gating_top_k/op_kernel/moe_gating_top_k_apt.cpp
csrc/moe/moe_gating_top_k/op_kernel/moe_gating_top_k_e_k_fullload.h
csrc/moe/moe_gating_top_k/op_kernel/moe_gating_top_k_generalized.h
csrc/moe/moe_gating_top_k/op_kernel/moe_gating_top_k_without_group.h
csrc/moe/moe_gating_top_k_hash/CMakeLists.txt
csrc/moe/moe_gating_top_k_hash/op_host/CMakeLists.txt
csrc/moe/moe_gating_top_k_hash/op_host/moe_gating_top_k_hash_def.cpp
csrc/moe/moe_gating_top_k_hash/op_host/moe_gating_top_k_hash_proto.cpp
csrc/moe/moe_gating_top_k_hash/op_host/moe_gating_top_k_hash_tiling.cpp
csrc/moe/moe_gating_top_k_hash/op_host/moe_gating_top_k_hash_tiling.h
csrc/moe/moe_gating_top_k_hash/op_host/moe_gating_top_k_hash_tiling_arch35.h
csrc/moe/moe_gating_top_k_hash/op_kernel/common.h
csrc/moe/moe_gating_top_k_hash/op_kernel/common_regbase.h
csrc/moe/moe_gating_top_k_hash/op_kernel/moe_gating_top_k_hash.cpp
csrc/moe/moe_gating_top_k_hash/op_kernel/moe_gating_top_k_hash_e_k_fullload.h
csrc/moe/moe_gating_top_k_hash/op_kernel/moe_gating_top_k_hash_generalized.h
csrc/moe/moe_gating_top_k_hash/op_kernel/moe_gating_top_k_hash_regbase.h
csrc/moe/moe_gating_top_k_hash/op_kernel/moe_gating_top_k_hash_without_group.h
csrc/moe/moe_gating_top_k_hash/op_kernel/arch35/common.h
csrc/moe/moe_gating_top_k_hash/op_kernel/arch35/moe_gating_top_k_hash_regbase.h
csrc/moe/moe_grouped_matmul/CMakeLists.txt
csrc/moe/moe_grouped_matmul/op_host/CMakeLists.txt
csrc/moe/moe_grouped_matmul/op_host/moe_grouped_matmul_cpu.cpp
csrc/moe/moe_grouped_matmul/op_host/moe_grouped_matmul_def.cpp
csrc/moe/moe_grouped_matmul/op_host/moe_grouped_matmul_infershape.cpp
csrc/moe/moe_grouped_matmul/op_host/moe_grouped_matmul_tiling.h
csrc/moe/moe_grouped_matmul/op_host/op_api/aclnn_moe_grouped_matmul.cpp
csrc/moe/moe_grouped_matmul/op_host/op_api/aclnn_moe_grouped_matmul.h
csrc/moe/moe_grouped_matmul/op_host/op_api/aclnn_moe_grouped_matmul_weight_nz.h
csrc/moe/moe_grouped_matmul/op_host/op_api/moe_grouped_matmul_l0.cpp
csrc/moe/moe_grouped_matmul/op_host/op_api/moe_grouped_matmul_l0.h
csrc/moe/moe_grouped_matmul/op_kernel/moe_grouped_matmul.cpp
csrc/moe/moe_grouped_matmul/op_kernel/moe_grouped_matmul.h
csrc/moe/moe_grouped_matmul/op_kernel/moe_grouped_matmul_tiling_data.h
csrc/moe/moe_init_routing_custom/CMakeLists.txt
csrc/moe/moe_init_routing_custom/moe_init_routing_custom_torch_adpt.h
csrc/moe/moe_init_routing_custom/op_host/CMakeLists.txt
csrc/moe/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.cpp
csrc/moe/moe_init_routing_custom/op_host/aclnn_moe_init_routing_custom.h
csrc/moe/moe_init_routing_custom/op_host/moe_init_routing_custom.cpp
csrc/moe/moe_init_routing_custom/op_host/moe_init_routing_custom.h
csrc/moe/moe_init_routing_custom/op_host/moe_init_routing_custom_def.cpp
csrc/moe/moe_init_routing_custom/op_host/moe_init_routing_custom_infershape.cpp
csrc/moe/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.cpp
csrc/moe/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling.h
csrc/moe/moe_init_routing_custom/op_host/moe_init_routing_custom_tiling_base.cpp
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_common.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_expert_tokens_count.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_full_load.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_full_load_base.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_full_load_dynamic_quant.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_full_load_static_quant.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_full_load_unquantized.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_gather_droppad_static_quant.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_gather_dynamic_quant.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_gather_out.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_gather_out_droppad.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_gather_sort_multi_core.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_gather_static_quant.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_mrgsort.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_out_performance.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_mrgsort_performance.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_row_idx_gather_droppad_dynamic.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_sort_actual_expert.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_sort_base.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_sort_multi_core_performance.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_custom_sort_one_core.h
csrc/moe/moe_init_routing_custom/op_kernel/moe_init_routing_custom.cpp
csrc/moe/scatter_nd_update_v2/CMakeLists.txt
csrc/moe/scatter_nd_update_v2/op_host/CMakeLists.txt
csrc/moe/scatter_nd_update_v2/op_host/scatter_nd_update_infershape.cpp
csrc/moe/scatter_nd_update_v2/op_host/scatter_nd_update_v2_def.cpp
csrc/moe/scatter_nd_update_v2/op_host/scatter_nd_update_v2_tiling.cpp
csrc/moe/scatter_nd_update_v2/op_host/scatter_nd_update_v2_tiling.h
csrc/moe/scatter_nd_update_v2/op_host/config/ascend910_93/scatter_nd_update_v2_binary.json
csrc/moe/scatter_nd_update_v2/op_host/config/ascend910_93/scatter_nd_update_v2_simplified_key.ini
csrc/moe/scatter_nd_update_v2/op_host/config/ascend910b/scatter_nd_update_v2_binary.json
csrc/moe/scatter_nd_update_v2/op_host/config/ascend910b/scatter_nd_update_v2_simplified_key.ini
csrc/moe/scatter_nd_update_v2/op_host/op_api/aclnn_scatter_nd_update_v2.cpp
csrc/moe/scatter_nd_update_v2/op_host/op_api/aclnn_scatter_nd_update_v2.h
csrc/moe/scatter_nd_update_v2/op_host/op_api/scatter_nd_update_v2.cpp
csrc/moe/scatter_nd_update_v2/op_host/op_api/scatter_nd_update_v2.h
csrc/moe/scatter_nd_update_v2/op_kernel/scatter_nd_update_common.h
csrc/moe/scatter_nd_update_v2/op_kernel/scatter_nd_update_large_index.h
csrc/moe/scatter_nd_update_v2/op_kernel/scatter_nd_update_linear_index.h
csrc/moe/scatter_nd_update_v2/op_kernel/scatter_nd_update_no_sort.h
csrc/moe/scatter_nd_update_v2/op_kernel/scatter_nd_update_v2.cpp
csrc/moe/scatter_nd_update_v2/op_kernel/scatter_nd_update_v2.h
csrc/moe/swiglu_group_quant/CMakeLists.txt
csrc/moe/swiglu_group_quant/op_host/CMakeLists.txt
csrc/moe/swiglu_group_quant/op_host/swiglu_group_quant_def.cpp
csrc/moe/swiglu_group_quant/op_host/swiglu_group_quant_proto.cpp
csrc/moe/swiglu_group_quant/op_host/swiglu_group_quant_tiling.cpp
csrc/moe/swiglu_group_quant/op_host/swiglu_group_quant_tiling.h
csrc/moe/swiglu_group_quant/op_kernel/swiglu_fp8_quant_per_token.h
csrc/moe/swiglu_group_quant/op_kernel/swiglu_group_quant.cpp
csrc/moe/swiglu_group_quant/op_kernel/swiglu_group_quant_base.h
csrc/moe/swiglu_group_quant/op_kernel/swiglu_group_quant_perf.h
csrc/moe/swiglu_group_quant/op_kernel/swiglu_mx_quant_perf.h
csrc/moe/transpose_kv_cache_by_block/CMakeLists.txt
csrc/moe/transpose_kv_cache_by_block/op_host/CMakeLists.txt
csrc/moe/transpose_kv_cache_by_block/op_host/transpose_kv_cache_by_block_def.cpp
csrc/moe/transpose_kv_cache_by_block/op_host/transpose_kv_cache_by_block_proto.cpp
csrc/moe/transpose_kv_cache_by_block/op_host/transpose_kv_cache_by_block_tiling.cpp
csrc/moe/transpose_kv_cache_by_block/op_host/transpose_kv_cache_by_block_tiling.h
csrc/moe/transpose_kv_cache_by_block/op_kernel/common.h
csrc/moe/transpose_kv_cache_by_block/op_kernel/full_load.h
csrc/moe/transpose_kv_cache_by_block/op_kernel/general.h
csrc/moe/transpose_kv_cache_by_block/op_kernel/transpose_kv_cache_by_block.cpp
csrc/scripts/opgen/opgen_standalone.py
csrc/scripts/opgen/template/CMakeLists.txt
csrc/scripts/opgen/template/add/CMakeLists.txt
csrc/scripts/opgen/template/add/examples/test_aclnn_add_example.cpp
csrc/scripts/opgen/template/add/op_host/CMakeLists.txt
csrc/scripts/opgen/template/add/op_host/add_example_def.cpp
csrc/scripts/opgen/template/add/op_host/add_example_infershape.cpp
csrc/scripts/opgen/template/add/op_host/add_example_tiling.cpp
csrc/scripts/opgen/template/add/op_kernel/add_example.cpp
csrc/scripts/opgen/template/add/op_kernel/add_example.h
csrc/scripts/opgen/template/add/op_kernel/add_example_tiling_data.h
csrc/scripts/opgen/template/add/op_kernel/add_example_tiling_key.h
csrc/scripts/opgen/template/add/tests/ut/.gitkeep
csrc/scripts/package/package.py
csrc/scripts/package/common/__init__.py
csrc/scripts/package/common/cfg/path.cfg
csrc/scripts/package/common/py/__init__.py
csrc/scripts/package/common/py/filelist.py
csrc/scripts/package/common/py/merge_binary_info_config.py
csrc/scripts/package/common/py/packer.py
csrc/scripts/package/common/py/pkg_parser.py
csrc/scripts/package/common/py/version_info.py
csrc/scripts/package/common/py/utils/comm_log.py
csrc/scripts/package/common/py/utils/funcbase.py
csrc/scripts/package/common/py/utils/pkg_utils.py
csrc/scripts/package/common/sh/check_version_required.awk
csrc/scripts/package/common/sh/cleanup.sh
csrc/scripts/package/common/sh/common_func.inc
csrc/scripts/package/common/sh/common_func_v2.inc
csrc/scripts/package/common/sh/common_installer.inc
csrc/scripts/package/common/sh/common_interface.csh
csrc/scripts/package/common/sh/common_interface.fish
csrc/scripts/package/common/sh/common_interface.sh
csrc/scripts/package/common/sh/install_common_parser.sh
csrc/scripts/package/common/sh/multi_version.inc
csrc/scripts/package/common/sh/script_operator.inc
csrc/scripts/package/common/sh/version_cfg.inc
csrc/scripts/package/common/sh/version_compatiable.inc
csrc/scripts/package/latest_manager/scripts/filelist.csv
csrc/scripts/package/latest_manager/scripts/install.sh
csrc/scripts/package/latest_manager/scripts/manager.sh
csrc/scripts/package/latest_manager/scripts/manager_func.sh
csrc/scripts/package/latest_manager/scripts/uninstall.sh
csrc/scripts/package/latest_manager/scripts/version.info
csrc/scripts/package/module/ascend/EngineeringCommon.xml
csrc/scripts/package/module/ascend/EngineeringFiles.xml
csrc/scripts/package/module/ascend/OpsTransformer.xml
csrc/scripts/package/module/ascend/OpsTransformerInc.xml
csrc/scripts/package/ops_transformer/ops_transformer.xml
csrc/scripts/package/ops_transformer/scripts/cleanup.sh
csrc/scripts/package/ops_transformer/scripts/help.info
csrc/scripts/package/ops_transformer/scripts/install.sh
csrc/scripts/package/ops_transformer/scripts/opp_common.sh
csrc/scripts/package/ops_transformer/scripts/opp_custom_install.sh
csrc/scripts/package/ops_transformer/scripts/opp_custom_uninstall.sh
csrc/scripts/package/ops_transformer/scripts/opp_install.sh
csrc/scripts/package/ops_transformer/scripts/opp_uninstall.sh
csrc/scripts/package/ops_transformer/scripts/uninstall.sh
csrc/scripts/package/ops_transformer/scripts/ver_check.sh
csrc/scripts/package/ops_transformer/scripts/empty_package_scripts/cleanup.sh
csrc/scripts/package/ops_transformer/scripts/empty_package_scripts/install.sh
csrc/scripts/util/__init__.py
csrc/scripts/util/build_opp_kernel_static.py
csrc/scripts/util/const_var.py
csrc/scripts/util/insert_op_info.py
csrc/scripts/util/merge_aicpu_info_json.sh
csrc/scripts/util/merge_proto.py
csrc/scripts/util/modify_gen_aclnn.py
csrc/utils/CMakeLists.txt
csrc/utils/inc/aclnn_util.h
csrc/utils/inc/fallback.h
csrc/utils/inc/fallback_comm.h
csrc/utils/inc/error/ops_error.h
csrc/utils/inc/kernel/comm_args.h
csrc/utils/inc/kernel/data_copy.h
csrc/utils/inc/kernel/dropmask.h
csrc/utils/inc/kernel/moe_distribute_base.h
csrc/utils/inc/kernel/pse.h
csrc/utils/inc/kernel/sync_collectives.h
csrc/utils/inc/kernel/util.h
csrc/utils/inc/log/ops_log.h
csrc/utils/inc/log/inner/dfx_base.h
csrc/utils/inc/tiling/data_copy_transpose_tiling.h
csrc/utils/inc/tiling/data_copy_transpose_tiling_def.h
csrc/utils/inc/tiling/tiling_base.h
csrc/utils/inc/tiling/tiling_templates_registry.h
csrc/utils/inc/tiling/tiling_type.h
csrc/utils/src/fallback_comm.cpp
docs/Makefile
docs/README.md
docs/requirements-docs.txt
docs/source/conf.py
docs/source/faqs.md
docs/source/index.md
docs/source/installation.md
docs/source/llms.txt
docs/source/quick_start.md
docs/source/_templates/Model-Deployment-Tutorial-Template.md
docs/source/_templates/Model-Deployment-Tutorial-Template.zh.md
docs/source/_templates/sections/header.html
docs/source/assets/ci_log_summary.png
docs/source/assets/deployment.png
docs/source/assets/disaggregated_prefill_pull.png
docs/source/assets/disaggregated_prefill_push.png
docs/source/assets/eplb.png
docs/source/assets/multi_node_dp_deepseek.png
docs/source/assets/multi_node_dp_kimi.png
docs/source/assets/sp_moe.png
docs/source/assets/workflow.png
docs/source/assets/cp/blocktable.png
docs/source/assets/cp/chunkedprefill.png
docs/source/assets/cp/dcp-decode.png
docs/source/assets/cp/dcp-prefill.png
docs/source/assets/cp/device_world.png
docs/source/assets/cp/head-tail-style.png
docs/source/assets/cp/overview.png
docs/source/assets/cp/pcp-decode.png
docs/source/assets/cp/pcp-prefill.png
docs/source/assets/quantization/get_quant_method.png
docs/source/assets/quantization/quant_algorithm_overview.png
docs/source/assets/quantization/quant_method_base_class.png
docs/source/assets/quantization/quant_method_call_flow.png
docs/source/assets/quantization/quant_methods_overview.png
docs/source/community/contributors.md
docs/source/community/governance.md
docs/source/community/issue-workflow-guidelines.md
docs/source/community/versioning_policy.md
docs/source/community/images/issue_label_workflow.png
docs/source/community/user_stories/index.md
docs/source/community/user_stories/llamafactory.md
docs/source/developer_guide/Design_Documents/ACL_Graph.md
docs/source/developer_guide/Design_Documents/KV_Cache_Pool_Guide.md
docs/source/developer_guide/Design_Documents/ModelRunner_prepare_inputs.md
docs/source/developer_guide/Design_Documents/add_custom_aclnn_op.md
docs/source/developer_guide/Design_Documents/context_parallel.md
docs/source/developer_guide/Design_Documents/cpu_binding.md
docs/source/developer_guide/Design_Documents/disaggregated_prefill.md
docs/source/developer_guide/Design_Documents/dynamic_chunked_pipeline_parallel.md
docs/source/developer_guide/Design_Documents/eplb_swift_balancer.md
docs/source/developer_guide/Design_Documents/index.md
docs/source/developer_guide/Design_Documents/npugraph_ex.md
docs/source/developer_guide/Design_Documents/patch.md
docs/source/developer_guide/Design_Documents/quantization.md
docs/source/developer_guide/contribution/doc_writing.md
docs/source/developer_guide/contribution/e2e_ci_test.md
docs/source/developer_guide/contribution/index.md
docs/source/developer_guide/contribution/multi_node_test.md
docs/source/developer_guide/contribution/nightly_ci_test.md
docs/source/developer_guide/contribution/testing.md
docs/source/developer_guide/evaluation/index.md
docs/source/developer_guide/evaluation/using_ais_bench.md
docs/source/developer_guide/evaluation/using_evalscope.md
docs/source/developer_guide/evaluation/using_lm_eval.md
docs/source/developer_guide/evaluation/using_opencompass.md
docs/source/developer_guide/performance_and_debug/index.md
docs/source/developer_guide/performance_and_debug/msprobe_guide.md
docs/source/developer_guide/performance_and_debug/optimization_and_tuning.md
docs/source/developer_guide/performance_and_debug/performance_benchmark.md
docs/source/developer_guide/performance_and_debug/service_profiling_guide.md
docs/source/locale/zh_CN/LC_MESSAGES/faqs.po
docs/source/locale/zh_CN/LC_MESSAGES/index.po
docs/source/locale/zh_CN/LC_MESSAGES/installation.po
docs/source/locale/zh_CN/LC_MESSAGES/quick_start.po
docs/source/locale/zh_CN/LC_MESSAGES/community/contributors.po
docs/source/locale/zh_CN/LC_MESSAGES/community/governance.po
docs/source/locale/zh_CN/LC_MESSAGES/community/issue-workflow-guidelines.po
docs/source/locale/zh_CN/LC_MESSAGES/community/versioning_policy.po
docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/index.po
docs/source/locale/zh_CN/LC_MESSAGES/community/user_stories/llamafactory.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ACL_Graph.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/KV_Cache_Pool_Guide.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/ModelRunner_prepare_inputs.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/add_custom_aclnn_op.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/context_parallel.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/cpu_binding.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/disaggregated_prefill.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/dynamic_chunked_pipeline_parallel.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/eplb_swift_balancer.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/npugraph_ex.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/patch.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/Design_Documents/quantization.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/e2e_ci_test.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/multi_node_test.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/nightly_ci_test.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/contribution/testing.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_ais_bench.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_evalscope.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_lm_eval.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/using_opencompass.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/evaluation/accuracy_report/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_model.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/adding_a_new_multimodal_model.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/modeling/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/index.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/msprobe_guide.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/optimization_and_tuning.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/performance_benchmark.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/profile_execute_duration.po
docs/source/locale/zh_CN/LC_MESSAGES/developer_guide/performance_and_debug/service_profiling_guide.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/dynamic_chunked_pipeline_parallel.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/index.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_multi_node.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/long_sequence_context_parallel_single_node.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_colocated_mooncake_multi_instance.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_multi_node.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/pd_disaggregation_mooncake_single_node.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/ray.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/features/suffix_speculative_decoding.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/310p.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/hardwares/index.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-R1.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.1.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeek-V3.2.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/DeepSeekOCR2.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM4.x.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/GLM5.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Hunyuan-A13B-Instruct.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2-Thinking.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Kimi-K2.5.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/LLaVA-OneVision-Qwen2-0.5B-OV.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/MiniMax-M2.5.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Minitron-8B-Base.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Mixtral-8x7B-Instruct-v0.1.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/PaddleOCR-VL.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen-VL-Dense.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-7B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Math-RM-72B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen2.5-Omni.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-235B-A22B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-30B-A3B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-32B-W4A4.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-8B-W4A8.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-ASR-1.7B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Coder-30B-A3B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Dense.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Next.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-235B-A22B-Instruct.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-30B-A3B-Instruct.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Embedding.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3-VL-Reranker.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-27B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3.5-397B-A17B.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_embedding.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/Qwen3_reranker.po
docs/source/locale/zh_CN/LC_MESSAGES/tutorials/models/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/release_notes.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/additional_config.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/env_vars.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/configuration/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/deployment_guide/using_volcano_kthena.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Ai_QoS_introduction_en.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Fine_grained_TP.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/Multi_Token_Prediction.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/batch_invariance.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/context_parallel.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/cpu_binding.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_batch.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/dynamic_chunk_pipeline_parallel.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/epd_disaggregation.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/eplb_swift_balancer.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/external_dp.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/flash_attention.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/graph_mode.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_cache_cpu_offload.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/kv_pool.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/large_scale_ep.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/layer_sharding.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lmcache_ascend_deployment.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/lora.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/netloader.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/quantization.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/rfork.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sequence_parallelism.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/sleep_mode.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/speculative_decoding.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/structured_output.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/ucm_deployment.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/feature_guide/weight_prefetch.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/feature_matrix.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/index.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_features.po
docs/source/locale/zh_CN/LC_MESSAGES/user_guide/support_matrix/supported_models.po
docs/source/logos/vllm-ascend-logo-text-dark.png
docs/source/logos/vllm-ascend-logo-text-light.png
docs/source/tutorials/features/dynamic_chunked_pipeline_parallel.md
docs/source/tutorials/features/index.md
docs/source/tutorials/features/long_sequence_context_parallel_multi_node.md
docs/source/tutorials/features/long_sequence_context_parallel_single_node.md
docs/source/tutorials/features/pd_colocated_mooncake_multi_instance.md
docs/source/tutorials/features/pd_disaggregation_mooncake_multi_node.md
docs/source/tutorials/features/pd_disaggregation_mooncake_single_node.md
docs/source/tutorials/features/ray.md
docs/source/tutorials/features/suffix_speculative_decoding.md
docs/source/tutorials/hardwares/310p.md
docs/source/tutorials/hardwares/index.md
docs/source/tutorials/models/DeepSeek-R1.md
docs/source/tutorials/models/DeepSeek-V3.1.md
docs/source/tutorials/models/DeepSeek-V3.2.md
docs/source/tutorials/models/DeepSeekOCR2.md
docs/source/tutorials/models/GLM4.x.md
docs/source/tutorials/models/GLM5.md
docs/source/tutorials/models/Hunyuan-A13B-Instruct.md
docs/source/tutorials/models/Hy3-preview.md
docs/source/tutorials/models/Kimi-K2-Thinking.md
docs/source/tutorials/models/Kimi-K2.5.md
docs/source/tutorials/models/LLaVA-OneVision-Qwen2-0.5B-OV.md
docs/source/tutorials/models/MiniMax-M2.5.md
docs/source/tutorials/models/Minitron-8B-Base.md
docs/source/tutorials/models/Mixtral-8x7B-Instruct-v0.1.md
docs/source/tutorials/models/PaddleOCR-VL.md
docs/source/tutorials/models/Qwen-VL-Dense.md
docs/source/tutorials/models/Qwen2.5-Math-RM-72B.md
docs/source/tutorials/models/Qwen3-235B-A22B.md
docs/source/tutorials/models/Qwen3-30B-A3B.md
docs/source/tutorials/models/Qwen3-32B-W4A4.md
docs/source/tutorials/models/Qwen3-8B-W4A8.md
docs/source/tutorials/models/Qwen3-ASR-1.7B.md
docs/source/tutorials/models/Qwen3-Coder-30B-A3B.md
docs/source/tutorials/models/Qwen3-Dense.md
docs/source/tutorials/models/Qwen3-Next.md
docs/source/tutorials/models/Qwen3-Omni-30B-A3B-Thinking.md
docs/source/tutorials/models/Qwen3-VL-235B-A22B-Instruct.md
docs/source/tutorials/models/Qwen3-VL-30B-A3B-Instruct.md
docs/source/tutorials/models/Qwen3-VL-Embedding.md
docs/source/tutorials/models/Qwen3-VL-Reranker.md
docs/source/tutorials/models/Qwen3.5-27B.md
docs/source/tutorials/models/Qwen3.5-397B-A17B.md
docs/source/tutorials/models/Qwen3_embedding.md
docs/source/tutorials/models/Qwen3_reranker.md
docs/source/tutorials/models/gpt-oss-120b.md
docs/source/tutorials/models/index.md
docs/source/user_guide/release_notes.md
docs/source/user_guide/configuration/additional_config.md
docs/source/user_guide/configuration/env_vars.md
docs/source/user_guide/configuration/index.md
docs/source/user_guide/deployment_guide/index.md
docs/source/user_guide/deployment_guide/using_volcano_kthena.md
docs/source/user_guide/feature_guide/Ai_QoS_introduction_en.md
docs/source/user_guide/feature_guide/Fine_grained_TP.md
docs/source/user_guide/feature_guide/Multi_Token_Prediction.md
docs/source/user_guide/feature_guide/batch_invariance.md
docs/source/user_guide/feature_guide/context_parallel.md
docs/source/user_guide/feature_guide/cpu_binding.md
docs/source/user_guide/feature_guide/dynamic_batch.md
docs/source/user_guide/feature_guide/dynamic_chunk_pipeline_parallel.md
docs/source/user_guide/feature_guide/epd_disaggregation.md
docs/source/user_guide/feature_guide/eplb_swift_balancer.md
docs/source/user_guide/feature_guide/external_dp.md
docs/source/user_guide/feature_guide/flash_attention.md
docs/source/user_guide/feature_guide/graph_mode.md
docs/source/user_guide/feature_guide/index.md
docs/source/user_guide/feature_guide/kv_cache_cpu_offload.md
docs/source/user_guide/feature_guide/kv_pool.md
docs/source/user_guide/feature_guide/large_scale_ep.md
docs/source/user_guide/feature_guide/layer_sharding.md
docs/source/user_guide/feature_guide/lmcache_ascend_deployment.md
docs/source/user_guide/feature_guide/lora.md
docs/source/user_guide/feature_guide/netloader.md
docs/source/user_guide/feature_guide/quantization.md
docs/source/user_guide/feature_guide/rfork.md
docs/source/user_guide/feature_guide/sequence_parallelism.md
docs/source/user_guide/feature_guide/sleep_mode.md
docs/source/user_guide/feature_guide/speculative_decoding.md
docs/source/user_guide/feature_guide/structured_output.md
docs/source/user_guide/feature_guide/ucm_deployment.md
docs/source/user_guide/feature_guide/weight_prefetch.md
docs/source/user_guide/feature_guide/images/ai_qos1.png
docs/source/user_guide/feature_guide/images/ai_qos2.png
docs/source/user_guide/feature_guide/images/epd_disaggregation.jpg
docs/source/user_guide/feature_guide/images/eplb_img.png
docs/source/user_guide/feature_guide/images/layer_sharding.png
docs/source/user_guide/feature_guide/images/netloader_flowchart.png
docs/source/user_guide/feature_guide/images/netloader_timing_diagram.png
docs/source/user_guide/feature_guide/images/rfork_flowchart.jpg
docs/source/user_guide/feature_guide/images/structured_output_1.png
docs/source/user_guide/support_matrix/feature_matrix.md
docs/source/user_guide/support_matrix/index.md
docs/source/user_guide/support_matrix/supported_features.md
docs/source/user_guide/support_matrix/supported_models.md
examples/device_print_demo.py
examples/offline_data_parallel.py
examples/offline_disaggregated_prefill_npu.py
examples/offline_embed.py
examples/offline_external_launcher.py
examples/offline_inference_audio_language.py
examples/offline_inference_metrics.py
examples/offline_inference_npu.py
examples/offline_inference_npu_long_seq.py
examples/offline_inference_npu_tp2.py
examples/offline_inference_sleep_mode_npu.py
examples/offline_weight_load.py
examples/prompt_embed_inference.py
examples/prompt_embedding_inference.py
examples/run_dp_server.sh
examples/save_sharded_state_310.py
examples/chat_templates/template_qwen2_audio.jinja
examples/disaggregated_encoder/disagg_1e1pd_example.sh
examples/disaggregated_encoder/disagg_epd_proxy.py
examples/disaggregated_prefill_v1/load_balance_proxy_layerwise_server_example.py
examples/disaggregated_prefill_v1/load_balance_proxy_server_example.py
examples/disaggregated_prefill_v1/mooncake_connector_deployment_guide.md
examples/epd_disaggregated/epd_disaggregated_guide.md
examples/epd_disaggregated/epd_load_balance_proxy_layerwise_server_example.py
examples/eplb/eplb_deepseek.py
examples/eplb/eplb_strategy.py
examples/external_online_dp/README.md
examples/external_online_dp/dp_load_balance_proxy_server.py
examples/external_online_dp/launch_online_dp.py
examples/external_online_dp/run_dp_template.sh
examples/quantization/llm-compressor/w4a8_dynamic_moe.py
examples/quantization/llm-compressor/w8a8_int8.py
examples/quantization/llm-compressor/w8a8_int8_dynamic.py
examples/quantization/llm-compressor/w8a8_int8_dynamic_moe.py
examples/rfork/rfork_planner.py
tests/__init__.py
tests/e2e/__init__.py
tests/e2e/common.sh
tests/e2e/conftest.py
tests/e2e/model_utils.py
tests/e2e/run_doctests.sh
tests/e2e/utils.py
tests/e2e/310p/test_utils.py
tests/e2e/310p/data/qwen.png
tests/e2e/310p/multicard/test_dense_model_multicard.py
tests/e2e/310p/multicard/test_moe_model_multicard.py
tests/e2e/310p/multicard/test_vl_model_multicard.py
tests/e2e/310p/singlecard/test_dense_model_singlecard.py
tests/e2e/310p/singlecard/test_vl_model_singlecard.py
tests/e2e/310p/singlecard/pooling/__init__.py
tests/e2e/310p/singlecard/pooling/test_classification.py
tests/e2e/310p/singlecard/pooling/test_embedding.py
tests/e2e/310p/singlecard/pooling/test_scoring.py
tests/e2e/doctests/001-quickstart-test.sh
tests/e2e/doctests/002-pip-binary-installation-test.sh
tests/e2e/light/coverage.md
tests/e2e/light/four-card/test_deepseek_v3_2_w8a8_pruning.py
tests/e2e/light/single-card/test_qwen3_0_6b.py
tests/e2e/light/single-card/test_qwen3_5_0_8b.py
tests/e2e/light/single-card/test_qwen3_8b_w8a8.py
tests/e2e/light/single-card/test_qwen3_embedding_0_6b.py
tests/e2e/light/two-card/test_qwen3_30b_a3b.py
tests/e2e/light/two-card/test_qwen3_vl_30b_a3b_instruct.py
tests/e2e/models/conftest.py
tests/e2e/models/report_template.md
tests/e2e/models/test_asr_eval_correctness.py
tests/e2e/models/test_lm_eval_correctness.py
tests/e2e/models/test_rm_eval_correctness.py
tests/e2e/models/configs/ERNIE-4.5-21B-A3B-PT.yaml
tests/e2e/models/configs/Hunyuan-A13B-Instruct.yaml
tests/e2e/models/configs/InternVL3_5-8B-hf.yaml
tests/e2e/models/configs/Llama-3.2-3B-Instruct.yaml
tests/e2e/models/configs/Minitron-8B-Base.yaml
tests/e2e/models/configs/Mixtral-8x7B-Instruct-v0.1.yaml
tests/e2e/models/configs/Molmo-7B-D-0924.yaml
tests/e2e/models/configs/Qwen2-Audio-7B-Instruct.yaml
tests/e2e/models/configs/Qwen2.5-Math-RM-72B.yaml
tests/e2e/models/configs/Qwen3-30B-A3B-W8A8.yaml
tests/e2e/models/configs/Qwen3-30B-A3B.yaml
tests/e2e/models/configs/Qwen3-8B-W8A8.yaml
tests/e2e/models/configs/Qwen3-8B.yaml
tests/e2e/models/configs/Qwen3-ASR-1.7B.yaml
tests/e2e/models/configs/Qwen3-Next-80B-A3B-Instruct.yaml
tests/e2e/models/configs/Qwen3-Omni-30B-A3B-Instruct.yaml
tests/e2e/models/configs/Qwen3-VL-30B-A3B-Instruct.yaml
tests/e2e/models/configs/Qwen3-VL-8B-Instruct-W8A8.yaml
tests/e2e/models/configs/Qwen3-VL-8B-Instruct.yaml
tests/e2e/models/configs/accuracy.txt
tests/e2e/models/configs/accuracy_groups_a2.json
tests/e2e/models/configs/gemma-3-4b-it.yaml
tests/e2e/models/configs/internlm3-8b-instruct.yaml
tests/e2e/models/configs/llava-onevision-qwen2-0.5b-ov-hf.yaml
tests/e2e/multicard/2-cards/test_aclgraph_capture_replay.py
tests/e2e/multicard/2-cards/test_data_parallel.py
tests/e2e/multicard/2-cards/test_disaggregated_encoder.py
tests/e2e/multicard/2-cards/test_external_launcher.py
tests/e2e/multicard/2-cards/test_full_graph_mode.py
tests/e2e/multicard/2-cards/test_ilama_lora_tp2.py
tests/e2e/multicard/2-cards/test_llama32_lora_tp2.py
tests/e2e/multicard/2-cards/test_offline_inference_distributed.py
tests/e2e/multicard/2-cards/test_offline_weight_load.py
tests/e2e/multicard/2-cards/test_prefix_caching.py
tests/e2e/multicard/2-cards/test_qwen3_moe.py
tests/e2e/multicard/2-cards/test_qwen3_moe_routing_replay.py
tests/e2e/multicard/2-cards/test_qwen3_performance.py
tests/e2e/multicard/2-cards/test_sequence_parallelism_moe.py
tests/e2e/multicard/2-cards/test_shared_expert_dp.py
tests/e2e/multicard/2-cards/test_single_request_aclgraph.py
tests/e2e/multicard/2-cards/test_sp_pass.py
tests/e2e/multicard/2-cards/spec_decode/test_spec_decode.py
tests/e2e/multicard/4-cards/test_data_parallel_tp2.py
tests/e2e/multicard/4-cards/test_deepseek_v4.py
tests/e2e/multicard/4-cards/test_kimi_k2.py
tests/e2e/multicard/4-cards/test_pipeline_parallel.py
tests/e2e/multicard/4-cards/test_profiling_chunk_performance.py
tests/e2e/multicard/4-cards/test_qwen3_5.py
tests/e2e/multicard/4-cards/test_qwen3_next.py
tests/e2e/multicard/4-cards/long_sequence/test_accuracy.py
tests/e2e/multicard/4-cards/long_sequence/test_basic.py
tests/e2e/multicard/4-cards/long_sequence/test_chunked_prefill_cp.py
tests/e2e/multicard/4-cards/long_sequence/test_mtp.py
tests/e2e/multicard/4-cards/long_sequence/test_prefix_caching_cp.py
tests/e2e/multicard/4-cards/spec_decode/test_mtp_qwen3_next.py
tests/e2e/nightly/310p/single_node/ops/singlecard_ops/test_recurrent_gated_delta_rule_v310.py
tests/e2e/nightly/multi_node/__init__.py
tests/e2e/nightly/multi_node/external_dp/__init__.py
tests/e2e/nightly/multi_node/external_dp/config/GLM5_1-W8A8-EP-external.yaml
tests/e2e/nightly/multi_node/external_dp/config/template.md
tests/e2e/nightly/multi_node/external_dp/scripts/__init__.py
tests/e2e/nightly/multi_node/external_dp/scripts/external_dp_config.py
tests/e2e/nightly/multi_node/external_dp/scripts/runtime.py
tests/e2e/nightly/multi_node/external_dp/scripts/test_external_dp.py
tests/e2e/nightly/multi_node/external_dp/scripts/utils.py
tests/e2e/nightly/multi_node/internal_dp/config/DeepSeek-R1-W8A8-EPLB.yaml
tests/e2e/nightly/multi_node/internal_dp/config/DeepSeek-R1-W8A8-longseq.yaml
tests/e2e/nightly/multi_node/internal_dp/config/DeepSeek-R1-W8A8.yaml
tests/e2e/nightly/multi_node/internal_dp/config/DeepSeek-V3.1-BF16.yaml
tests/e2e/nightly/multi_node/internal_dp/config/DeepSeek-V3_2-W8A8-A3-dual-nodes.yaml
tests/e2e/nightly/multi_node/internal_dp/config/DeepSeek-V3_2-W8A8-EP.yaml
tests/e2e/nightly/multi_node/internal_dp/config/GLM5_1-W8A8-A2-dual-nodes.yaml
tests/e2e/nightly/multi_node/internal_dp/config/GLM5_1-W8A8-A3-dual-nodes.yaml
tests/e2e/nightly/multi_node/internal_dp/config/GLM5_1-W8A8-EP.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Kimi-K2_5-W4A8-A2-dual-nodes.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-235B-A22B-A2.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-235B-A22B-Mooncake-Layerwise.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-235B-A22B.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-235B-W8A8-EPLB.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-235B-W8A8-longseq.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-235B-W8A8.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-235B-disagg-pd.yaml
tests/e2e/nightly/multi_node/internal_dp/config/Qwen3-VL-235B-disagg-pd.yaml
tests/e2e/nightly/multi_node/internal_dp/scripts/__init__.py
tests/e2e/nightly/multi_node/internal_dp/scripts/multi_node_config.py
tests/e2e/nightly/multi_node/internal_dp/scripts/test_multi_node.py
tests/e2e/nightly/multi_node/internal_dp/scripts/utils.py
tests/e2e/nightly/multi_node/scripts/__init__.py
tests/e2e/nightly/multi_node/scripts/benchmark_results.py
tests/e2e/nightly/multi_node/scripts/lws.yaml.jinja2
tests/e2e/nightly/multi_node/scripts/run.sh
tests/e2e/nightly/multi_node/scripts/utils.py
tests/e2e/nightly/single_node/models/configs/DeepSeek-R1-0528-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/DeepSeek-V3.2-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/DeepSeek-V4-Flash-W8A8-A3.yaml
tests/e2e/nightly/single_node/models/configs/GLM-4.7.yaml
tests/e2e/nightly/single_node/models/configs/Hy3-preview.yaml
tests/e2e/nightly/single_node/models/configs/Kimi-K2-Thinking.yaml
tests/e2e/nightly/single_node/models/configs/Kimi-K2.5.yaml
tests/e2e/nightly/single_node/models/configs/MTPX-DeepSeek-R1-0528-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/MiniMax-M2.5-w8a8-QuaRot-A2.yaml
tests/e2e/nightly/single_node/models/configs/MiniMax-M2.5-w8a8-QuaRot-A3.yaml
tests/e2e/nightly/single_node/models/configs/Prefix-Cache-DeepSeek-R1-0528-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/Prefix-Cache-Qwen3-32B-Int8.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-235B-A22B-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-30B-A3B-W4A8-llm-compressor.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-30B-A3B-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-30B-QuaRot-eagle3.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-32B-Int8-A2.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-32B-Int8.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-32B-QuaRot-eagle3.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-VL-235B-A22B-Instruct-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3-VL-32B-Instruct-W8A8.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3.5-122B-A10B-W8A8-A3.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3.5-27B-w8a8-A2.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3.5-27B-w8a8-A3.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3.5-397B-A17B-W8A8-mtp-A3.yaml
tests/e2e/nightly/single_node/models/configs/Qwen3.5-397B-A17B-w4a8-mtp-A2.yaml
tests/e2e/nightly/single_node/models/scripts/GUIDE_AND_TEMPLATE.md
tests/e2e/nightly/single_node/models/scripts/__init__.py
tests/e2e/nightly/single_node/models/scripts/single_node_config.py
tests/e2e/nightly/single_node/models/scripts/test_single_node.py
tests/e2e/nightly/single_node/ops/__init__.py
tests/e2e/nightly/single_node/ops/conftest.py
tests/e2e/nightly/single_node/ops/multicard_ops_a2/__init__.py
tests/e2e/nightly/single_node/ops/multicard_ops_a2/test_matmul_allreduce_add_rmsnorm.py
tests/e2e/nightly/single_node/ops/multicard_ops_a3/__init__.py
tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine.py
tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_bf16.py
tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_ffn_combine_w4a8.py
tests/e2e/nightly/single_node/ops/multicard_ops_a3/test_dispatch_gmm_combine_decode.py
tests/e2e/nightly/single_node/ops/singlecard_ops/__init__.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_add_rms_norm_bias.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_apply_top_k_top_p_custom.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_batch_matmul_transpose.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_expand.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_bgmv_shrink.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_causal_conv1d_310.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_copy_and_expand_eagle_inputs.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_dequant_swiglu_quant.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_fused_moe.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_gating_top_k_softmax.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_gmm_swiglu_quant_weight_nz_tensor_list.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_grouped_matmul_swiglu_quant.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_hamming_dist_top_k.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_nq.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_mla_preprocess_qdown.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_moe_init_routing_custom.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_ngram_spec_decode.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_hc_pre.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_npu_moe_gating_top_k.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_recurrent_gated_delta_rule.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_recurrent_gated_delta_rule_310.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_reshape_and_cache_bnsd.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_transpose_kv_cache_by_block.py
tests/e2e/nightly/single_node/ops/singlecard_ops/test_vocabparallelembedding.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/__init__.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_apply_penalties_triton.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_bad_words.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_batch_memcpy.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_bincount.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_causal_conv1d.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_chunk_gated_delta_rule.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_clear_ssm_states.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_compute_slot_mapping.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_compute_token_logprobs.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_compute_topk_logprobs.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_gdn_gating.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_qkvzba_split_reshape_cat.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_recurrent_gated_delta_rule.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_fused_sigmoid_gating_delta_rule.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_gdn_chunk_meta.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_l2norm.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_log_softmax.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_min_p.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_mrope.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_muls_add.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_penality.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_post_update.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_prepare_inputs_padded.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rejection_sample.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_rope.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_mrope.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_rmsnorm_rope.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_split_qkv_tp_rmsnorm_rope.py
tests/e2e/nightly/single_node/ops/singlecard_ops/triton/test_temperature.py
tests/e2e/prompts/example.txt
tests/e2e/prompts/long_prompt.txt
tests/e2e/singlecard/__init__.py
tests/e2e/singlecard/test_aclgraph_accuracy.py
tests/e2e/singlecard/test_aclgraph_batch_invariant.py
tests/e2e/singlecard/test_aclgraph_mem.py
tests/e2e/singlecard/test_async_scheduling.py
tests/e2e/singlecard/test_attention_fa3.py
tests/e2e/singlecard/test_auto_fit_max_mode_len.py
tests/e2e/singlecard/test_batch_invariant.py
tests/e2e/singlecard/test_camem.py
tests/e2e/singlecard/test_completion_with_prompt_embeds.py
tests/e2e/singlecard/test_cpu_offloading.py
tests/e2e/singlecard/test_eager_mode_acc.py
tests/e2e/singlecard/test_guided_decoding.py
tests/e2e/singlecard/test_ilama_lora.py
tests/e2e/singlecard/test_llama32_lora.py
tests/e2e/singlecard/test_models.py
tests/e2e/singlecard/test_multi_instance.py
tests/e2e/singlecard/test_multistream_overlap_shared_expert.py
tests/e2e/singlecard/test_qwen35_densemodel_lora.py
tests/e2e/singlecard/test_qwen3_multi_loras.py
tests/e2e/singlecard/test_sampler.py
tests/e2e/singlecard/test_vlm.py
tests/e2e/singlecard/test_xlite.py
tests/e2e/singlecard/utils.py
tests/e2e/singlecard/compile/__init__.py
tests/e2e/singlecard/compile/backend.py
tests/e2e/singlecard/compile/test_graphex_norm_quant_fusion.py
tests/e2e/singlecard/compile/test_graphex_qknorm_rope_fusion.py
tests/e2e/singlecard/compile/test_norm_quant_fusion.py
tests/e2e/singlecard/model_runner_v2/__init__.py
tests/e2e/singlecard/model_runner_v2/test_basic.py
tests/e2e/singlecard/pooling/__init__.py
tests/e2e/singlecard/pooling/test_classification.py
tests/e2e/singlecard/pooling/test_embedding.py
tests/e2e/singlecard/pooling/test_qwen3_reranker_lora.py
tests/e2e/singlecard/pooling/test_scoring.py
tests/e2e/singlecard/pooling/template/qwen3_reranker.jinja
tests/e2e/singlecard/spec_decode/__init__.py
tests/e2e/singlecard/spec_decode/test_extract_hidden_states.py
tests/e2e/singlecard/spec_decode/test_mtp_eagle_correctness.py
tests/e2e/singlecard/spec_decode/test_v1_spec_decode.py
tests/e2e/vllm_interface/vllm_test.cfg
tests/e2e/vllm_interface/singlecard/test_sampler.py
tests/e2e/weekly/multi_node/internal_dp/config/DeepSeek-V3.yaml
tests/e2e/weekly/multi_node/internal_dp/config/DeepSeek-V3_2-W8A8-EP_weekly.yaml
tests/e2e/weekly/multi_node/internal_dp/config/GLM-4.7-W8A8C8-Mooncake-Layerwise.yaml
tests/e2e/weekly/multi_node/internal_dp/config/GLM5_1-W8A8-A3-dual-nodes.yaml
tests/e2e/weekly/single_node/configs/DeepSeek-V3.2-W8A8.yaml
tests/e2e/weekly/single_node/configs/DeepSeek-V3.2-W8A8_A3_weekly.yaml
tests/e2e/weekly/single_node/configs/GLM-5.yaml
tests/e2e/weekly/single_node/configs/GLM-5_1-W8A8_A3_weekly.yaml
tests/e2e/weekly/single_node/configs/Kimi-K2.5-32k-512.yaml
tests/e2e/weekly/single_node/configs/Kimi-K2.5.yaml
tests/e2e/weekly/single_node/configs/MiniMax-M2.5-W8A8-A3.yaml
tests/e2e/weekly/single_node/configs/MiniMax-M2.5-w8a8-QuaRot-A3.yaml
tests/e2e/weekly/single_node/configs/Qwen2.5-VL-7B-Instruct-EPD.yaml
tests/e2e/weekly/single_node/configs/Qwen3-32B.yaml
tests/e2e/weekly/single_node/configs/Qwen3.5-122B-A10B-W8A8-A2.yaml
tests/e2e/weekly/single_node/configs/Qwen3.5-122B-A10B-W8A8-A3.yaml
tests/e2e/weekly/single_node/configs/Qwen3.5-27B-w8a8-A3.yaml
tests/e2e/weekly/single_node/configs/Qwen3.5-397B-A17B-W8A8-mtp-A3.yaml
tests/e2e/weekly/single_node/configs/Qwen3.5-397B-A17B-W8A8-mtp-A3_weekly.yaml
tests/e2e/weekly/single_node/models/test_qwen3_30b_acc.py
tests/ut/__init__.py
tests/ut/base.py
tests/ut/conftest.py
tests/ut/test_ai_qos_tool.py
tests/ut/test_ascend_config.py
tests/ut/test_envs.py
tests/ut/test_platform.py
tests/ut/test_utils.py
tests/ut/_310p/test_block_table_310p.py
tests/ut/_310p/test_model_runner_310p.py
tests/ut/_310p/test_sharded_state_loader_310p.py
tests/ut/_310p/attention/test_attention_mask_310.py
tests/ut/_310p/attention/test_attention_v1_310.py
tests/ut/_310p/fused_moe/test_experts_selector_310.py
tests/ut/_310p/fused_moe/test_moe_mlp_310.py
tests/ut/_310p/fused_moe/test_shared_fused_moe_310.py
tests/ut/_310p/ops/test_chunk_gated_delta_rule_310.py
tests/ut/_310p/ops/test_conv_310.py
tests/ut/_310p/ops/test_layernorm_310.py
tests/ut/_310p/ops/test_mm_encoder_attention_310.py
tests/ut/_310p/ops/test_rotary_embedding_310.py
tests/ut/_310p/quantization/test_modelslim_config_310.py
tests/ut/_310p/quantization/test_w8a8_dynamic_310.py
tests/ut/_310p/quantization/test_w8a8_static_310.py
tests/ut/_310p/quantization/test_w8a8s_310.py
tests/ut/_310p/quantization/test_w8a8sc_310.py
tests/ut/_310p/sample/test_sampler_310.py
tests/ut/attention/test_attention_cp.py
tests/ut/attention/test_attention_cp_precision.py
tests/ut/attention/test_attention_fa3.py
tests/ut/attention/test_attention_mask.py
tests/ut/attention/test_attention_v1.py
tests/ut/attention/test_attention_v1_precision.py
tests/ut/attention/test_common_cp.py
tests/ut/attention/test_mla_cp.py
tests/ut/attention/test_mla_cp_precision.py
tests/ut/attention/test_mla_precision.py
tests/ut/attention/test_mla_v1.py
tests/ut/attention/test_sfa_cp.py
tests/ut/attention/test_sfa_cp_precision.py
tests/ut/attention/test_sfa_v1.py
tests/ut/attention/test_sfa_v1_precision.py
tests/ut/attention/utils.py
tests/ut/batch_invariant/test_batch_invariant.py
tests/ut/compilation/test_acl_graph.py
tests/ut/compilation/test_npugraph_ex_utils_check.py
tests/ut/core/test_profiling_chunk.py
tests/ut/core/test_scheduler_dynamic_batch.py
tests/ut/device_allocator/test_camem.py
tests/ut/device_allocator/test_cpu_binding.py
tests/ut/distributed/test_communicator.py
tests/ut/distributed/test_determin_expert_map_all.py
tests/ut/distributed/test_parallel_state.py
tests/ut/distributed/ascend_store/_mock_deps.py
tests/ut/distributed/ascend_store/test_ascend_store_connector.py
tests/ut/distributed/ascend_store/test_backend.py
tests/ut/distributed/ascend_store/test_config_data.py
tests/ut/distributed/ascend_store/test_kv_transfer.py
tests/ut/distributed/ascend_store/test_pool_scheduler.py
tests/ut/distributed/ascend_store/test_pool_worker.py
tests/ut/distributed/device_communicators/test_pyhccl.py
tests/ut/distributed/device_communicators/test_pyhccl_wrapper.py
tests/ut/distributed/mooncake/test_mooncake_config_data.py
tests/ut/distributed/mooncake/test_mooncake_kv_transfer.py
tests/ut/dummy/test_dummy.py
tests/ut/eplb/test_eplb_updator.py
tests/ut/eplb/adaptor/test_vllm_adaptor.py
tests/ut/eplb/core/expert_map.json
tests/ut/eplb/core/test_eplb_device_transfer_loader.py
tests/ut/eplb/core/test_eplb_utils.py
tests/ut/eplb/core/policy/test_policy_factory.py
tests/ut/fake_weight/config.json
tests/ut/kv_connector/test_mooncake_connector.py
tests/ut/kv_connector/test_mooncake_layerwise_connector.py
tests/ut/kv_connector/test_remote_decode_lifecycle.py
tests/ut/kv_connector/test_remote_prefill_lifecycle.py
tests/ut/kv_connector/utils.py
tests/ut/model_loader/netloader/test_netloader.py
tests/ut/model_loader/netloader/test_netloader_elastic.py
tests/ut/model_loader/netloader/test_netloader_load.py
tests/ut/model_loader/netloader/test_netloader_utils.py
tests/ut/ops/test_activation.py
tests/ut/ops/test_comm_utils.py
tests/ut/ops/test_flashcomm2_oshard_manager.py
tests/ut/ops/test_fused_moe.py
tests/ut/ops/test_gate_linear.py
tests/ut/ops/test_gdn_chunk_meta.py
tests/ut/ops/test_layer_shard_linear.py
tests/ut/ops/test_layernorm.py
tests/ut/ops/test_linear.py
tests/ut/ops/test_mla.py
tests/ut/ops/test_moe_comm_method.py
tests/ut/ops/test_moe_mlp.py
tests/ut/ops/test_moe_runtime_args.py
tests/ut/ops/test_prepare_finalize.py
tests/ut/ops/test_rotary_embedding.py
tests/ut/ops/test_select_experts.py
tests/ut/ops/test_token_dispatcher.py
tests/ut/ops/test_vocab_parallel_embedding.py
tests/ut/ops/test_weight_prefetch.py
tests/ut/patch/platform/test_patch_deepseek_v4_thinking.py
tests/ut/patch/platform/test_patch_deepseek_v4_tool_call_parser.py
tests/ut/patch/platform/test_patch_glm47_tool_call_parser.py
tests/ut/patch/platform/test_patch_glm_tool_call_streaming.py
tests/ut/patch/platform/test_patch_minimax_m2_tool_call_parser.py
tests/ut/patch/platform/test_patch_minimax_usage_accounting.py
tests/ut/patch/platform/test_patch_tool_choice_none_content.py
tests/ut/patch/worker/patch_common/test_hccl_pg_registry.py
tests/ut/patch/worker/patch_common/test_patch_distributed.py
tests/ut/patch/worker/patch_common/test_patch_gdn_attn.py
tests/ut/profiler/test_torch_npu_profiler.py
tests/ut/quantization/conftest_quantization.py
tests/ut/quantization/test_compressed_tensors_config.py
tests/ut/quantization/test_method_adapters.py
tests/ut/quantization/test_modelslim_config.py
tests/ut/quantization/test_quant_parser.py
tests/ut/quantization/test_utils.py
tests/ut/quantization/methods/test_kv_c8.py
tests/ut/quantization/methods/test_moe_logical_experts.py
tests/ut/quantization/methods/test_registry.py
tests/ut/quantization/methods/test_w4a16.py
tests/ut/quantization/methods/test_w4a4_flatquant.py
tests/ut/quantization/methods/test_w4a4_laos_dynamic.py
tests/ut/quantization/methods/test_w4a4_mxfp4.py
tests/ut/quantization/methods/test_w4a4_mxfp4_flatquant_dynamic.py
tests/ut/quantization/methods/test_w4a8.py
tests/ut/quantization/methods/test_w8a16.py
tests/ut/quantization/methods/test_w8a8_dynamic.py
tests/ut/quantization/methods/test_w8a8_mxfp8.py
tests/ut/quantization/methods/test_w8a8_pdmix.py
tests/ut/quantization/methods/test_w8a8_static.py
tests/ut/sample/test_rejection_sampler.py
tests/ut/sample/test_sampler.py
tests/ut/spec_decode/test_eagle_proposer.py
tests/ut/spec_decode/test_extract_hidden_states_proposer.py
tests/ut/tools/test_docs_codegen.py
tests/ut/worker/test_block_table.py
tests/ut/worker/test_dsv4_compressed_positions.py
tests/ut/worker/test_kvcomp_utils.py
tests/ut/worker/test_model_runner_v1.py
tests/ut/worker/test_model_runner_v1_with_device.py
tests/ut/worker/test_pcp_manager.py
tests/ut/worker/test_worker_multi_instance.py
tests/ut/worker/test_worker_v1.py
tools/actionlint.sh
tools/ai_qos.py
tools/aisbench.py
tools/bisect_helper.py
tools/bisect_vllm.sh
tools/check_boolean_context_manager.py
tools/check_forbidden_imports.py
tools/check_logger.sh
tools/check_python_src_init.py
tools/check_repo.sh
tools/collect_user_first_contribution.sh
tools/format_contributors.py
tools/mooncake_installer.sh
tools/mypy.sh
tools/png-lint.sh
tools/send_mm_request.py
tools/send_request.py
tools/shellcheck.sh
tools/sphinx-lint.sh
tools/vllm_bench.py
tools/ai_qos/CMakeLists.txt
tools/ai_qos/ai_qos.cpp
tools/docs_codegen/__init__.py
tools/docs_codegen/cli.py
tools/docs_codegen/converters.py
tools/docs_codegen/errors.py
tools/docs_codegen/generator.py
tools/docs_codegen/scanner.py
tools/docs_codegen/sphinx_extension.py
tools/docs_codegen/utils.py
tools/docs_codegen/yaml_loader.py
vllm_ascend/__init__.py
vllm_ascend/_version.py
vllm_ascend/ascend_config.py
vllm_ascend/ascend_forward_context.py
vllm_ascend/batch_invariant.py
vllm_ascend/cpu_binding.py
vllm_ascend/envs.py
vllm_ascend/flash_common3_context.py
vllm_ascend/meta_registration.py
vllm_ascend/platform.py
vllm_ascend/profiling_config.py
vllm_ascend/utils.py
vllm_ascend.egg-info/PKG-INFO
vllm_ascend.egg-info/SOURCES.txt
vllm_ascend.egg-info/dependency_links.txt
vllm_ascend.egg-info/entry_points.txt
vllm_ascend.egg-info/requires.txt
vllm_ascend.egg-info/top_level.txt
vllm_ascend/_310p/__init__.py
vllm_ascend/_310p/block_table.py
vllm_ascend/_310p/model_runner_310p.py
vllm_ascend/_310p/npu_input_batch.py
vllm_ascend/_310p/sharded_state_loader_310p.py
vllm_ascend/_310p/worker_310p.py
vllm_ascend/_310p/attention/__init__.py
vllm_ascend/_310p/attention/attention_mask.py
vllm_ascend/_310p/attention/attention_v1.py
vllm_ascend/_310p/attention/metadata_builder.py
vllm_ascend/_310p/fused_moe/__init__.py
vllm_ascend/_310p/fused_moe/experts_selector.py
vllm_ascend/_310p/fused_moe/fused_moe.py
vllm_ascend/_310p/fused_moe/moe_comm_method.py
vllm_ascend/_310p/fused_moe/moe_mlp.py
vllm_ascend/_310p/fused_moe/token_dispatcher.py
vllm_ascend/_310p/ops/__init__.py
vllm_ascend/_310p/ops/activation.py
vllm_ascend/_310p/ops/causal_conv1d.py
vllm_ascend/_310p/ops/conv.py
vllm_ascend/_310p/ops/layernorm.py
vllm_ascend/_310p/ops/mm_encoder_attention.py
vllm_ascend/_310p/ops/rotary_embedding.py
vllm_ascend/_310p/ops/vocab_parallel_embedding.py
vllm_ascend/_310p/ops/fla/__init__.py
vllm_ascend/_310p/ops/fla/chunk_gated_delta_rule.py
vllm_ascend/_310p/ops/fla/fused_gdn_gating.py
vllm_ascend/_310p/ops/fla/fused_recurrent_gated_delta_rule.py
vllm_ascend/_310p/ops/fla/gdn_310.py
vllm_ascend/_310p/ops/fla/idex.py
vllm_ascend/_310p/quantization/__init__.py
vllm_ascend/_310p/quantization/modelslim_config.py
vllm_ascend/_310p/quantization/methods/__init__.py
vllm_ascend/_310p/quantization/methods/registry.py
vllm_ascend/_310p/quantization/methods/w8a8_base.py
vllm_ascend/_310p/quantization/methods/w8a8_dynamic.py
vllm_ascend/_310p/quantization/methods/w8a8_static.py
vllm_ascend/_310p/quantization/methods/w8a8s.py
vllm_ascend/_310p/quantization/methods/w8a8sc.py
vllm_ascend/_310p/sample/__init__.py
vllm_ascend/_310p/sample/sampler.py
vllm_ascend/_cann_ops_custom/.gitkeep
vllm_ascend/attention/__init__.py
vllm_ascend/attention/abstract.py
vllm_ascend/attention/attention_mask.py
vllm_ascend/attention/attention_v1.py
vllm_ascend/attention/dsa_v1.py
vllm_ascend/attention/fa3_v1.py
vllm_ascend/attention/mla_v1.py
vllm_ascend/attention/sfa_v1.py
vllm_ascend/attention/utils.py
vllm_ascend/attention/context_parallel/__init__.py
vllm_ascend/attention/context_parallel/attention_cp.py
vllm_ascend/attention/context_parallel/common_cp.py
vllm_ascend/attention/context_parallel/dsa_cp.py
vllm_ascend/attention/context_parallel/mla_cp.py
vllm_ascend/attention/context_parallel/sfa_cp.py
vllm_ascend/attention/kvcomp_attn/__init__.py
vllm_ascend/attention/kvcomp_attn/attention_utils.py
vllm_ascend/compilation/__init__.py
vllm_ascend/compilation/acl_graph.py
vllm_ascend/compilation/compiler_interface.py
vllm_ascend/compilation/graph_fusion_pass_manager.py
vllm_ascend/compilation/passes/__init__.py
vllm_ascend/compilation/passes/allgather_chunk_noop_pass.py
vllm_ascend/compilation/passes/allreduce_rmsnorm_fusion_pass.py
vllm_ascend/compilation/passes/base_pattern.py
vllm_ascend/compilation/passes/muls_add_pass.py
vllm_ascend/compilation/passes/noop_elimination.py
vllm_ascend/compilation/passes/norm_quant_fusion_pass.py
vllm_ascend/compilation/passes/qknorm_rope_fusion_pass.py
vllm_ascend/compilation/passes/sequence_parallelism.py
vllm_ascend/compilation/passes/sequence_parallelism_moe.py
vllm_ascend/compilation/passes/utils/__init__.py
vllm_ascend/compilation/passes/utils/npugraph_ex_utils_check.py
vllm_ascend/core/__init__.py
vllm_ascend/core/profiling_chunk_predictor.py
vllm_ascend/core/recompute_scheduler.py
vllm_ascend/core/scheduler_dynamic_batch.py
vllm_ascend/core/scheduler_profiling_chunk.py
vllm_ascend/core/single_type_kv_cache_manager.py
vllm_ascend/device/__init__.py
vllm_ascend/device/device_op.py
vllm_ascend/device/mxfp_compat.py
vllm_ascend/device_allocator/__init__.py
vllm_ascend/device_allocator/camem.py
vllm_ascend/distributed/__init__.py
vllm_ascend/distributed/parallel_state.py
vllm_ascend/distributed/utils.py
vllm_ascend/distributed/device_communicators/__init__.py
vllm_ascend/distributed/device_communicators/npu_communicator.py
vllm_ascend/distributed/device_communicators/pyhccl.py
vllm_ascend/distributed/device_communicators/pyhccl_wrapper.py
vllm_ascend/distributed/kv_transfer/__init__.py
vllm_ascend/distributed/kv_transfer/ascend_multi_connector.py
vllm_ascend/distributed/kv_transfer/kv_p2p/__init__.py
vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_connector.py
vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_hybrid_connector.py
vllm_ascend/distributed/kv_transfer/kv_p2p/mooncake_layerwise_connector.py
vllm_ascend/distributed/kv_transfer/kv_pool/__init__.py
vllm_ascend/distributed/kv_transfer/kv_pool/lmcache_ascend_connector.py
vllm_ascend/distributed/kv_transfer/kv_pool/ucm_connector.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/__init__.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/ascend_store_connector.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/config_data.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/kv_transfer.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_scheduler.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/pool_worker.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/__init__.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/backend.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/memcache_backend.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/mooncake_backend.py
vllm_ascend/distributed/kv_transfer/kv_pool/ascend_store/backend/yuanrong_backend.py
vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/__init__.py
vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_kv_cache_manager.py
vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/cpu_offload_connector.py
vllm_ascend/distributed/kv_transfer/kv_pool/cpu_offload/metadata.py
vllm_ascend/distributed/kv_transfer/utils/__init__.py
vllm_ascend/distributed/kv_transfer/utils/mooncake_transfer_engine.py
vllm_ascend/distributed/kv_transfer/utils/utils.py
vllm_ascend/eplb/__init__.py
vllm_ascend/eplb/eplb_updator.py
vllm_ascend/eplb/utils.py
vllm_ascend/eplb/adaptor/__init__.py
vllm_ascend/eplb/adaptor/vllm_adaptor.py
vllm_ascend/eplb/core/__init__.py
vllm_ascend/eplb/core/eplb_device_transfer_loader.py
vllm_ascend/eplb/core/eplb_utils.py
vllm_ascend/eplb/core/eplb_worker.py
vllm_ascend/eplb/core/policy/__init__.py
vllm_ascend/eplb/core/policy/policy_abstract.py
vllm_ascend/eplb/core/policy/policy_default_eplb.py
vllm_ascend/eplb/core/policy/policy_factory.py
vllm_ascend/eplb/core/policy/policy_flashlb.py
vllm_ascend/eplb/core/policy/policy_random.py
vllm_ascend/eplb/core/policy/policy_swift_balancer.py
vllm_ascend/kv_offload/__init__.py
vllm_ascend/kv_offload/cpu_npu.py
vllm_ascend/kv_offload/npu.py
vllm_ascend/lora/__init__.py
vllm_ascend/lora/lora_ops.py
vllm_ascend/lora/punica_npu.py
vllm_ascend/lora/utils.py
vllm_ascend/model_loader/__init__.py
vllm_ascend/model_loader/netloader/__init__.py
vllm_ascend/model_loader/netloader/load.py
vllm_ascend/model_loader/netloader/netloader.py
vllm_ascend/model_loader/netloader/utils.py
vllm_ascend/model_loader/netloader/executor/__init__.py
vllm_ascend/model_loader/netloader/executor/elastic_load.py
vllm_ascend/model_loader/netloader/executor/netloader_pg.py
vllm_ascend/model_loader/netloader/interaction/__init__.py
vllm_ascend/model_loader/netloader/interaction/elastic.py
vllm_ascend/model_loader/rfork/__init__.py
vllm_ascend/model_loader/rfork/rfork_loader.py
vllm_ascend/model_loader/rfork/rfork_worker.py
vllm_ascend/model_loader/rfork/seed_protocol.py
vllm_ascend/model_loader/rfork/seed_server.py
vllm_ascend/model_loader/rfork/transfer_backend.py
vllm_ascend/models/__init__.py
vllm_ascend/models/deepseek_v4.py
vllm_ascend/models/deepseek_v4_mtp.py
vllm_ascend/models/layer/__init__.py
vllm_ascend/models/layer/attention/__init__.py
vllm_ascend/models/layer/attention/layer.py
vllm_ascend/ops/__init__.py
vllm_ascend/ops/activation.py
vllm_ascend/ops/bailing_moe_linear_attn.py
vllm_ascend/ops/conv.py
vllm_ascend/ops/cv_linear.py
vllm_ascend/ops/dsa.py
vllm_ascend/ops/flashcomm2_oshard_manager.py
vllm_ascend/ops/gdn.py
vllm_ascend/ops/layer_shard_linear.py
vllm_ascend/ops/layernorm.py
vllm_ascend/ops/linear.py
vllm_ascend/ops/linear_op.py
vllm_ascend/ops/mhc.py
vllm_ascend/ops/mla.py
vllm_ascend/ops/mm_encoder_attention.py
vllm_ascend/ops/qwen2_decoder.py
vllm_ascend/ops/register_custom_ops.py
vllm_ascend/ops/rel_pos_attention.py
vllm_ascend/ops/rope_dsv4.py
vllm_ascend/ops/rotary_embedding.py
vllm_ascend/ops/vocab_parallel_embedding.py
vllm_ascend/ops/weight_prefetch.py
vllm_ascend/ops/fused_moe/__init__.py
vllm_ascend/ops/fused_moe/comm_utils.py
vllm_ascend/ops/fused_moe/experts_selector.py
vllm_ascend/ops/fused_moe/fused_moe.py
vllm_ascend/ops/fused_moe/gate_linear.py
vllm_ascend/ops/fused_moe/moe_comm_method.py
vllm_ascend/ops/fused_moe/moe_mlp.py
vllm_ascend/ops/fused_moe/moe_runtime_args.py
vllm_ascend/ops/fused_moe/moe_stage_contracts.py
vllm_ascend/ops/fused_moe/moe_stage_params.py
vllm_ascend/ops/fused_moe/prepare_finalize.py
vllm_ascend/ops/fused_moe/token_dispatcher.py
vllm_ascend/ops/triton/__init__.py
vllm_ascend/ops/triton/batch_memcpy.py
vllm_ascend/ops/triton/bincount.py
vllm_ascend/ops/triton/fused_gdn_gating.py
vllm_ascend/ops/triton/gdn_chunk_meta.py
vllm_ascend/ops/triton/layernorm_gated.py
vllm_ascend/ops/triton/mul_add.py
vllm_ascend/ops/triton/muls_add.py
vllm_ascend/ops/triton/penalty.py
vllm_ascend/ops/triton/reject_sample.py
vllm_ascend/ops/triton/rms_norm.py
vllm_ascend/ops/triton/rope.py
vllm_ascend/ops/triton/triton_utils.py
vllm_ascend/ops/triton/activation/__init__.py
vllm_ascend/ops/triton/activation/swiglu_quant.py
vllm_ascend/ops/triton/batch_invariant/__init__.py
vllm_ascend/ops/triton/batch_invariant/matmul.py
vllm_ascend/ops/triton/batch_invariant/mean.py
vllm_ascend/ops/triton/batch_invariant/rmsnorm.py
vllm_ascend/ops/triton/batch_invariant/softmax.py
vllm_ascend/ops/triton/fla/__init__.py
vllm_ascend/ops/triton/fla/chunk.py
vllm_ascend/ops/triton/fla/chunk_delta_h.py
vllm_ascend/ops/triton/fla/chunk_delta_hupdate.py
vllm_ascend/ops/triton/fla/chunk_o.py
vllm_ascend/ops/triton/fla/chunk_o_update.py
vllm_ascend/ops/triton/fla/chunk_scaled_dot_kkt.py
vllm_ascend/ops/triton/fla/cumsum.py
vllm_ascend/ops/triton/fla/fused_qkvzba_split_reshape.py
vllm_ascend/ops/triton/fla/l2norm.py
vllm_ascend/ops/triton/fla/layernorm_guard.py
vllm_ascend/ops/triton/fla/sigmoid_gating.py
vllm_ascend/ops/triton/fla/solve_tril.py
vllm_ascend/ops/triton/fla/utils.py
vllm_ascend/ops/triton/fla/wy_fast.py
vllm_ascend/ops/triton/linearnorm/__init__.py
vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_mrope.py
vllm_ascend/ops/triton/linearnorm/split_qkv_rmsnorm_rope.py
vllm_ascend/ops/triton/linearnorm/split_qkv_tp_rmsnorm_rope.py
vllm_ascend/ops/triton/mamba/__init__.py
vllm_ascend/ops/triton/mamba/causal_conv1d.py
vllm_ascend/ops/triton/mamba/lightning_attn.py
vllm_ascend/ops/triton/spec_decode/__init__.py
vllm_ascend/ops/triton/spec_decode/utils.py
vllm_ascend/patch/__init__.py
vllm_ascend/patch/platform/__init__.py
vllm_ascend/patch/platform/patch_balance_schedule.py
vllm_ascend/patch/platform/patch_camem_allocator.py
vllm_ascend/patch/platform/patch_deepseek_v4_thinking.py
vllm_ascend/patch/platform/patch_deepseek_v4_tool_call_parser.py
vllm_ascend/patch/platform/patch_distributed.py
vllm_ascend/patch/platform/patch_glm47_tool_call_parser.py
vllm_ascend/patch/platform/patch_glm_tool_call_streaming.py
vllm_ascend/patch/platform/patch_kv_cache_coordinator.py
vllm_ascend/patch/platform/patch_kv_cache_interface.py
vllm_ascend/patch/platform/patch_kv_cache_utils.py
vllm_ascend/patch/platform/patch_mamba_config.py
vllm_ascend/patch/platform/patch_mamba_config_310.py
vllm_ascend/patch/platform/patch_minimax_m2_config.py
vllm_ascend/patch/platform/patch_minimax_m2_tool_call_parser.py
vllm_ascend/patch/platform/patch_minimax_usage_accounting.py
vllm_ascend/patch/platform/patch_mla_prefill_backend.py
vllm_ascend/patch/platform/patch_multiproc_executor.py
vllm_ascend/patch/platform/patch_profiling_chunk.py
vllm_ascend/patch/platform/patch_speculative_config.py
vllm_ascend/patch/platform/patch_tool_choice_none_content.py
vllm_ascend/patch/platform/patch_torch_accelerator.py
vllm_ascend/patch/worker/__init__.py
vllm_ascend/patch/worker/_hccl_pg_registry.py
vllm_ascend/patch/worker/patch_cudagraph.py
vllm_ascend/patch/worker/patch_deepseek_compressor.py
vllm_ascend/patch/worker/patch_deepseek_mtp.py
vllm_ascend/patch/worker/patch_distributed.py
vllm_ascend/patch/worker/patch_draft_quarot.py
vllm_ascend/patch/worker/patch_gdn_attn.py
vllm_ascend/patch/worker/patch_gqa_c8.py
vllm_ascend/patch/worker/patch_idex_310.py
vllm_ascend/patch/worker/patch_kimi_k25.py
vllm_ascend/patch/worker/patch_mamba_utils.py
vllm_ascend/patch/worker/patch_minimax_m2.py
vllm_ascend/patch/worker/patch_minimax_m2_linear_attn.py
vllm_ascend/patch/worker/patch_npugraph_ex_triton.py
vllm_ascend/patch/worker/patch_qwen3_5.py
vllm_ascend/patch/worker/patch_qwen3_dflash.py
vllm_ascend/patch/worker/patch_qwen3_next_mtp.py
vllm_ascend/patch/worker/patch_qwen3vl.py
vllm_ascend/patch/worker/patch_rejection_sampler.py
vllm_ascend/patch/worker/patch_triton.py
vllm_ascend/patch/worker/patch_weight_utils.py
vllm_ascend/patch/worker/patch_v2/__init__.py
vllm_ascend/patch/worker/patch_v2/patch_attn_utils.py
vllm_ascend/patch/worker/patch_v2/patch_block_table.py
vllm_ascend/patch/worker/patch_v2/patch_input_batch.py
vllm_ascend/patch/worker/patch_v2/patch_model_state.py
vllm_ascend/patch/worker/patch_v2/patch_triton.py
vllm_ascend/patch/worker/patch_v2/patch_uva.py
vllm_ascend/profiler/__init__.py
vllm_ascend/profiler/torch_npu_profiler.py
vllm_ascend/quantization/__init__.py
vllm_ascend/quantization/compressed_tensors_config.py
vllm_ascend/quantization/method_adapters.py
vllm_ascend/quantization/modelslim_config.py
vllm_ascend/quantization/quant_parser.py
vllm_ascend/quantization/quant_type.py
vllm_ascend/quantization/utils.py
vllm_ascend/quantization/methods/__init__.py
vllm_ascend/quantization/methods/base.py
vllm_ascend/quantization/methods/kv_c8.py
vllm_ascend/quantization/methods/registry.py
vllm_ascend/quantization/methods/w4a16.py
vllm_ascend/quantization/methods/w4a4_flatquant.py
vllm_ascend/quantization/methods/w4a4_laos_dynamic.py
vllm_ascend/quantization/methods/w4a4_mxfp4.py
vllm_ascend/quantization/methods/w4a4_mxfp4_flatquant.py
vllm_ascend/quantization/methods/w4a8.py
vllm_ascend/quantization/methods/w8a16.py
vllm_ascend/quantization/methods/w8a8_dynamic.py
vllm_ascend/quantization/methods/w8a8_mxfp8.py
vllm_ascend/quantization/methods/w8a8_pdmix.py
vllm_ascend/quantization/methods/w8a8_static.py
vllm_ascend/sample/__init__.py
vllm_ascend/sample/penalties.py
vllm_ascend/sample/rejection_sampler.py
vllm_ascend/sample/sampler.py
vllm_ascend/spec_decode/__init__.py
vllm_ascend/spec_decode/dflash_proposer.py
vllm_ascend/spec_decode/draft_proposer.py
vllm_ascend/spec_decode/eagle_proposer.py
vllm_ascend/spec_decode/extract_hidden_states_proposer.py
vllm_ascend/spec_decode/llm_base_proposer.py
vllm_ascend/spec_decode/medusa_proposer.py
vllm_ascend/spec_decode/ngram_proposer.py
vllm_ascend/spec_decode/ngram_proposer_npu.py
vllm_ascend/spec_decode/suffix_proposer.py
vllm_ascend/spec_decode/utils.py
vllm_ascend/worker/__init__.py
vllm_ascend/worker/block_table.py
vllm_ascend/worker/kvcomp_utils.py
vllm_ascend/worker/model_runner_v1.py
vllm_ascend/worker/npu_input_batch.py
vllm_ascend/worker/pcp_utils.py
vllm_ascend/worker/worker.py
vllm_ascend/worker/v2/README.md
vllm_ascend/worker/v2/__init__.py
vllm_ascend/worker/v2/aclgraph_utils.py
vllm_ascend/worker/v2/attn_utils.py
vllm_ascend/worker/v2/block_table.py
vllm_ascend/worker/v2/input_batch.py
vllm_ascend/worker/v2/model_runner.py
vllm_ascend/worker/v2/states.py
vllm_ascend/worker/v2/structured_outputs.py
vllm_ascend/worker/v2/utils.py
vllm_ascend/worker/v2/model_states/__init__.py
vllm_ascend/worker/v2/model_states/default.py
vllm_ascend/worker/v2/sample/__init__.py
vllm_ascend/worker/v2/sample/bad_words.py
vllm_ascend/worker/v2/sample/gumbel.py
vllm_ascend/worker/v2/sample/logprob.py
vllm_ascend/worker/v2/sample/min_p.py
vllm_ascend/worker/v2/sample/penalties.py
vllm_ascend/worker/v2/spec_decode/__init__.py
vllm_ascend/worker/v2/spec_decode/rejection_sampler_utils.py
vllm_ascend/worker/v2/spec_decode/eagle/__init__.py
vllm_ascend/worker/v2/spec_decode/eagle/aclgraph.py
vllm_ascend/worker/v2/spec_decode/eagle/speculator.py
vllm_ascend/xlite/__init__.py
vllm_ascend/xlite/utils.py
vllm_ascend/xlite/xlite.py
vllm_ascend/xlite/xlite_model_runner.py
vllm_ascend/xlite/xlite_worker.py