LICENSE
MANIFEST.in
README.md
requirements.txt
setup.py
auto_round_kernel/CMakeLists.txt
auto_round_kernel/__init__.py
auto_round_kernel/ark.cpp
auto_round_kernel/qlinear.py
auto_round_kernel/sdpa.cpp
auto_round_kernel/sdpa_generation.cmake
auto_round_kernel/sdpa_kernel_declarations.hpp.in
auto_round_kernel/sdpa_kernel_instantiation.cpp.in
auto_round_kernel/torch_sdpa_patch.py
auto_round_kernel/version.py
auto_round_kernel/bestla/.clang-format
auto_round_kernel/bestla/CMakeLists.txt
auto_round_kernel/bestla/CMakePresets.json
auto_round_kernel/bestla/README.md
auto_round_kernel/bestla/bestla/bestla.h
auto_round_kernel/bestla/bestla/bestla_device.h
auto_round_kernel/bestla/bestla/bestla_epilogue.h
auto_round_kernel/bestla/bestla/bestla_gemm.h
auto_round_kernel/bestla/bestla/bestla_gemm_kblock.h
auto_round_kernel/bestla/bestla/bestla_jit.h
auto_round_kernel/bestla/bestla/bestla_parallel.h
auto_round_kernel/bestla/bestla/bestla_prologue_a.h
auto_round_kernel/bestla/bestla/bestla_prologue_b.h
auto_round_kernel/bestla/bestla/bestla_storage.h
auto_round_kernel/bestla/bestla/bestla_utils.h
auto_round_kernel/bestla/bestla/bestla_wrapper.h
auto_round_kernel/bestla/bestla/kernel_avx2.h
auto_round_kernel/bestla/bestla/kernel_avx512_bf16.h
auto_round_kernel/bestla/bestla/kernel_avx512_fp16.h
auto_round_kernel/bestla/bestla/kernel_avx512_vnni.h
auto_round_kernel/bestla/bestla/kernel_avx512f.h
auto_round_kernel/bestla/bestla/kernel_avx_vnni.h
auto_round_kernel/bestla/bestla/kernel_jit.h
auto_round_kernel/bestla/bestla/kernel_jit_injector.h
auto_round_kernel/bestla/bestla/kernel_ref.h
auto_round_kernel/bestla/bestla/kernel_wrapper.h
auto_round_kernel/bestla/bestla/sycl/fp8_lut.h
auto_round_kernel/bestla/bestla/sycl/sycl_device.h
auto_round_kernel/bestla/bestla/sycl/sycl_epilogue.h
auto_round_kernel/bestla/bestla/sycl/sycl_gemm.h
auto_round_kernel/bestla/bestla/sycl/sycl_prologue_a.h
auto_round_kernel/bestla/bestla/sycl/sycl_prologue_b.h
auto_round_kernel/bestla/bestla/sycl/sycl_storage.h
auto_round_kernel/bestla/bestla/sycl/sycl_utils.h
auto_round_kernel/bestla/bestla/sycl/sycl_wrapper.h
auto_round_kernel/bestla/bestla/ut/bestla.cpp
auto_round_kernel/bestla/bestla/ut/bestla_benchmark.cpp
auto_round_kernel/bestla/bestla/ut/bestla_epilogue.cpp
auto_round_kernel/bestla/bestla/ut/bestla_gemm.cpp
auto_round_kernel/bestla/bestla/ut/bestla_parallel.cpp
auto_round_kernel/bestla/bestla/ut/bestla_prologue_a.cpp
auto_round_kernel/bestla/bestla/ut/bestla_prologue_b.cpp
auto_round_kernel/bestla/bestla/ut/bestla_ut.cpp
auto_round_kernel/bestla/bestla/ut/bestla_ut.h
auto_round_kernel/bestla/bestla/ut/bestla_utils.cpp
auto_round_kernel/bestla/bestla/ut/bestla_wrapper.cpp
auto_round_kernel/bestla/bestla/ut/kernel_intrin.cpp
auto_round_kernel/bestla/bestla/ut/kernel_jit.cpp
auto_round_kernel/bestla/bestla/ut/kernel_ut.h
auto_round_kernel/bestla/bestla/ut/kernel_wrapper.cpp
auto_round_kernel/bestla/bestla/ut/sycl_benchmark.cpp
auto_round_kernel/bestla/bestla/ut/sycl_gemm.cpp
auto_round_kernel/bestla/bestla/ut/sycl_misc.cpp
auto_round_kernel/bestla/bestla/ut/sycl_tla_flash_attn_decode_bench.cpp
auto_round_kernel/bestla/bestla/ut/sycl_tla_flash_attn_prefill_bench.cpp
auto_round_kernel/bestla/bestla/ut/sycl_tla_moe_bench.cpp
auto_round_kernel/bestla/bestla/ut/sycl_ut.h
auto_round_kernel/bestla/cmake/FindSIMD.cmake
auto_round_kernel/bestla/cmake/sycl.cmake
auto_round_kernel/wrapper/include/cpu_wrapper.hpp
auto_round_kernel/wrapper/include/dnnl_wrapper.hpp
auto_round_kernel/wrapper/include/sycl_tla_common.hpp
auto_round_kernel/wrapper/include/sycl_tla_moe.hpp
auto_round_kernel/wrapper/include/sycl_tla_sdpa.hpp
auto_round_kernel/wrapper/include/sycl_tla_wrapper.hpp
auto_round_kernel/wrapper/include/utils.hpp
auto_round_kernel/wrapper/include/xpu_wrapper.hpp
auto_round_kernel/wrapper/include/stla/xe_sage_fwd_kernel.hpp
auto_round_kernel/wrapper/include/stla/xe_sagev1_fwd_mainloop.hpp
auto_round_kernel/wrapper/include/stla/xe_sdpa_fwd_mainloop.hpp
auto_round_kernel/wrapper/test/common.hpp
auto_round_kernel/wrapper/test/test_gemm.hpp
auto_round_kernel/wrapper/test/test_main.cpp
auto_round_kernel/wrapper/test/test_quant.hpp
auto_round_kernel/wrapper/test/test_sdpa.hpp
auto_round_lib.egg-info/PKG-INFO
auto_round_lib.egg-info/SOURCES.txt
auto_round_lib.egg-info/dependency_links.txt
auto_round_lib.egg-info/requires.txt
auto_round_lib.egg-info/top_level.txt
test/test_bench_bmg.py
test/test_flash_attn.py
test/test_matmul.py
test/test_moe.py
test/test_packq.py
test/test_sage_dynquant.py
test/test_sdpa.py
test/test_sdpa_parity.py
test/test_weightonly.py
test/ut_utils.py
tools/lm_eval_with_ark_sdpa.py