LICENSE
MANIFEST.in
README.md
pyproject.toml
requirements.txt
setup.py
GPTQModel.egg-info/PKG-INFO
GPTQModel.egg-info/SOURCES.txt
GPTQModel.egg-info/dependency_links.txt
GPTQModel.egg-info/requires.txt
GPTQModel.egg-info/top_level.txt
gptqmodel/__init__.py
gptqmodel/_banner.py
gptqmodel/extension.py
gptqmodel/version.py
gptqmodel/adapter/__init__.py
gptqmodel/adapter/adapter.py
gptqmodel/adapter/peft.py
gptqmodel/adapter/remote.py
gptqmodel/eora/__init__.py
gptqmodel/eora/eora.py
gptqmodel/exllamav3/__init__.py
gptqmodel/exllamav3/ext.py
gptqmodel/exllamav3/modules/__init__.py
gptqmodel/exllamav3/modules/quant/__init__.py
gptqmodel/exllamav3/modules/quant/exl3.py
gptqmodel/exllamav3/modules/quant/exl3_lib/__init__.py
gptqmodel/exllamav3/modules/quant/exl3_lib/quantize.py
gptqmodel/exllamav3/util/__init__.py
gptqmodel/exllamav3/util/arch_list.py
gptqmodel/exllamav3/util/hadamard.py
gptqmodel/exllamav3/util/memory.py
gptqmodel/exllamav3/util/misc.py
gptqmodel/exllamav3/util/progress.py
gptqmodel/exllamav3/util/tensor.py
gptqmodel/exllamav3/util/hadamard_data/hadamard_1.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_100.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_116.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_156.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_172.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_188.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_236.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_244.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_428.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_52.txt
gptqmodel/exllamav3/util/hadamard_data/hadamard_92.txt
gptqmodel/hf_kernels/__init__.py
gptqmodel/hf_kernels/causal_conv1d/__init__.py
gptqmodel/hf_kernels/causal_conv1d/causal_conv1d_interface.py
gptqmodel/hf_kernels/causal_conv1d/causal_conv1d_varlen.py
gptqmodel/hf_kernels/mamba_ssm/__init__.py
gptqmodel/hf_kernels/mamba_ssm/ops/__init__.py
gptqmodel/hf_kernels/mamba_ssm/ops/selective_scan_interface.py
gptqmodel/hf_kernels/mamba_ssm/ops/triton/__init__.py
gptqmodel/hf_kernels/mamba_ssm/ops/triton/selective_state_update.py
gptqmodel/hf_kernels/mamba_ssm/ops/triton/ssd_combined.py
gptqmodel/hf_minimax_m2/__init__.py
gptqmodel/hf_minimax_m2/configuration_minimax_m2.py
gptqmodel/hf_minimax_m2/modeling_minimax_m2.py
gptqmodel/hf_minimax_m2/test_minimax_m2_hf.py
gptqmodel/looper/__init__.py
gptqmodel/looper/awq_processor.py
gptqmodel/looper/dequantize_processor.py
gptqmodel/looper/eora_processor.py
gptqmodel/looper/exllamav3_processor.py
gptqmodel/looper/forward_executor.py
gptqmodel/looper/gptq_processor.py
gptqmodel/looper/input_cache.py
gptqmodel/looper/linear_mode.py
gptqmodel/looper/loop_processor.py
gptqmodel/looper/module_looper.py
gptqmodel/looper/module_preprocessor.py
gptqmodel/looper/named_module.py
gptqmodel/looper/native_processor.py
gptqmodel/looper/paroquant_processor.py
gptqmodel/looper/qqq_processor.py
gptqmodel/looper/stage_inputs_capture.py
gptqmodel/looper/stage_layer.py
gptqmodel/looper/stage_subset.py
gptqmodel/looper/weight_only_looper.py
gptqmodel/looper/weight_only_processor.py
gptqmodel/models/__init__.py
gptqmodel/models/_const.py
gptqmodel/models/auto.py
gptqmodel/models/base.py
gptqmodel/models/loader.py
gptqmodel/models/moe_lifecycle.py
gptqmodel/models/writer.py
gptqmodel/models/definitions/__init__.py
gptqmodel/models/definitions/afmoe.py
gptqmodel/models/definitions/apertus.py
gptqmodel/models/definitions/baichuan.py
gptqmodel/models/definitions/bailing_moe.py
gptqmodel/models/definitions/base_qwen2_5_omni.py
gptqmodel/models/definitions/base_qwen2_vl.py
gptqmodel/models/definitions/base_qwen3_vl.py
gptqmodel/models/definitions/bloom.py
gptqmodel/models/definitions/brumby.py
gptqmodel/models/definitions/chatglm.py
gptqmodel/models/definitions/codegen.py
gptqmodel/models/definitions/dbrx.py
gptqmodel/models/definitions/dbrx_converted.py
gptqmodel/models/definitions/decilm.py
gptqmodel/models/definitions/deepseek_v2.py
gptqmodel/models/definitions/deepseek_v3.py
gptqmodel/models/definitions/deepseek_v4.py
gptqmodel/models/definitions/dots1.py
gptqmodel/models/definitions/dream.py
gptqmodel/models/definitions/ernie4_5.py
gptqmodel/models/definitions/ernie4_5_moe.py
gptqmodel/models/definitions/ernie4_5_vl_moe.py
gptqmodel/models/definitions/exaone.py
gptqmodel/models/definitions/exaone4.py
gptqmodel/models/definitions/falcon_h1.py
gptqmodel/models/definitions/falcon_mamba.py
gptqmodel/models/definitions/gemma2.py
gptqmodel/models/definitions/gemma3.py
gptqmodel/models/definitions/gemma3n.py
gptqmodel/models/definitions/gemma4.py
gptqmodel/models/definitions/glm.py
gptqmodel/models/definitions/glm4_moe.py
gptqmodel/models/definitions/glm4_moe_lite.py
gptqmodel/models/definitions/glm4v.py
gptqmodel/models/definitions/glm4v_moe.py
gptqmodel/models/definitions/glm_moe_dsa.py
gptqmodel/models/definitions/glm_ocr.py
gptqmodel/models/definitions/glmasr.py
gptqmodel/models/definitions/gpt2.py
gptqmodel/models/definitions/gpt_bigcode.py
gptqmodel/models/definitions/gpt_neo.py
gptqmodel/models/definitions/gpt_neox.py
gptqmodel/models/definitions/gpt_oss.py
gptqmodel/models/definitions/gptj.py
gptqmodel/models/definitions/granitemoehybrid.py
gptqmodel/models/definitions/grinmoe.py
gptqmodel/models/definitions/hrm_text.py
gptqmodel/models/definitions/hunyuan_v1_dense.py
gptqmodel/models/definitions/hunyuan_v1_moe.py
gptqmodel/models/definitions/hymba.py
gptqmodel/models/definitions/instella.py
gptqmodel/models/definitions/internlm.py
gptqmodel/models/definitions/internlm2.py
gptqmodel/models/definitions/interns1.py
gptqmodel/models/definitions/internvl_chat.py
gptqmodel/models/definitions/kimi_k25.py
gptqmodel/models/definitions/klear.py
gptqmodel/models/definitions/laguna.py
gptqmodel/models/definitions/lfm2_moe.py
gptqmodel/models/definitions/llada2.py
gptqmodel/models/definitions/llama.py
gptqmodel/models/definitions/llama4.py
gptqmodel/models/definitions/llava_qwen2.py
gptqmodel/models/definitions/longcat_flash.py
gptqmodel/models/definitions/mimo.py
gptqmodel/models/definitions/mimo_v2.py
gptqmodel/models/definitions/minicpm.py
gptqmodel/models/definitions/minicpm3.py
gptqmodel/models/definitions/minicpm_o.py
gptqmodel/models/definitions/minicpmv.py
gptqmodel/models/definitions/minicpmv_4_6.py
gptqmodel/models/definitions/minimax_m2.py
gptqmodel/models/definitions/mistral3.py
gptqmodel/models/definitions/mixtral.py
gptqmodel/models/definitions/mllama.py
gptqmodel/models/definitions/mobilellm.py
gptqmodel/models/definitions/moss.py
gptqmodel/models/definitions/mpt.py
gptqmodel/models/definitions/nemotron_h.py
gptqmodel/models/definitions/nemotron_labs_diffusion.py
gptqmodel/models/definitions/nemotron_omni.py
gptqmodel/models/definitions/olmoe.py
gptqmodel/models/definitions/opt.py
gptqmodel/models/definitions/ovis.py
gptqmodel/models/definitions/ovis2.py
gptqmodel/models/definitions/ovis2_5.py
gptqmodel/models/definitions/ovis2_6_moe.py
gptqmodel/models/definitions/pangu_alpha.py
gptqmodel/models/definitions/phi.py
gptqmodel/models/definitions/phi3.py
gptqmodel/models/definitions/phi4.py
gptqmodel/models/definitions/qwen.py
gptqmodel/models/definitions/qwen2.py
gptqmodel/models/definitions/qwen2_5_omni.py
gptqmodel/models/definitions/qwen2_5_vl.py
gptqmodel/models/definitions/qwen2_moe.py
gptqmodel/models/definitions/qwen2_vl.py
gptqmodel/models/definitions/qwen3.py
gptqmodel/models/definitions/qwen3_5.py
gptqmodel/models/definitions/qwen3_5_moe.py
gptqmodel/models/definitions/qwen3_5_moe_text.py
gptqmodel/models/definitions/qwen3_5_text.py
gptqmodel/models/definitions/qwen3_moe.py
gptqmodel/models/definitions/qwen3_next.py
gptqmodel/models/definitions/qwen3_omni_moe.py
gptqmodel/models/definitions/qwen3_vl.py
gptqmodel/models/definitions/rw.py
gptqmodel/models/definitions/starcoder2.py
gptqmodel/models/definitions/telechat2.py
gptqmodel/models/definitions/voxtral.py
gptqmodel/models/definitions/xverse.py
gptqmodel/models/definitions/zamba.py
gptqmodel/models/definitions/zamba2.py
gptqmodel/nn_modules/__init__.py
gptqmodel/nn_modules/converter.py
gptqmodel/nn_modules/exllamav3.py
gptqmodel/nn_modules/exllamav3_torch.py
gptqmodel/nn_modules/hooked_linear.py
gptqmodel/nn_modules/qlinear/__init__.py
gptqmodel/nn_modules/qlinear/bitblas.py
gptqmodel/nn_modules/qlinear/bitblas_awq.py
gptqmodel/nn_modules/qlinear/bitblas_target_detector.py
gptqmodel/nn_modules/qlinear/bitsandbytes.py
gptqmodel/nn_modules/qlinear/exllamav2.py
gptqmodel/nn_modules/qlinear/exllamav2_awq.py
gptqmodel/nn_modules/qlinear/fp4.py
gptqmodel/nn_modules/qlinear/fp8.py
gptqmodel/nn_modules/qlinear/gemm_awq.py
gptqmodel/nn_modules/qlinear/gemm_awq_triton.py
gptqmodel/nn_modules/qlinear/gemv_awq.py
gptqmodel/nn_modules/qlinear/gemv_fast_awq.py
gptqmodel/nn_modules/qlinear/gguf.py
gptqmodel/nn_modules/qlinear/gguf_cpp.py
gptqmodel/nn_modules/qlinear/gguf_triton.py
gptqmodel/nn_modules/qlinear/lookahead.py
gptqmodel/nn_modules/qlinear/machete.py
gptqmodel/nn_modules/qlinear/machete_awq.py
gptqmodel/nn_modules/qlinear/marlin.py
gptqmodel/nn_modules/qlinear/marlin_awq.py
gptqmodel/nn_modules/qlinear/pack_block_ext.py
gptqmodel/nn_modules/qlinear/paroquant.py
gptqmodel/nn_modules/qlinear/paroquant_triton.py
gptqmodel/nn_modules/qlinear/qqq.py
gptqmodel/nn_modules/qlinear/torch.py
gptqmodel/nn_modules/qlinear/torch_aten_kernel.py
gptqmodel/nn_modules/qlinear/torch_aten_kernel_awq.py
gptqmodel/nn_modules/qlinear/torch_awq.py
gptqmodel/nn_modules/qlinear/torch_fused.py
gptqmodel/nn_modules/qlinear/torch_fused_awq.py
gptqmodel/nn_modules/qlinear/torch_int8.py
gptqmodel/nn_modules/qlinear/torch_int8_awq.py
gptqmodel/nn_modules/qlinear/tritonv2.py
gptqmodel/nn_modules/qlinear/utils.py
gptqmodel/nn_modules/triton_utils/__init__.py
gptqmodel/nn_modules/triton_utils/custom_autotune.py
gptqmodel/nn_modules/triton_utils/dequant.py
gptqmodel/nn_modules/triton_utils/kernels.py
gptqmodel/nn_modules/triton_utils/mixin.py
gptqmodel/quantization/__init__.py
gptqmodel/quantization/config.py
gptqmodel/quantization/dtype.py
gptqmodel/quantization/fallback_smooth.py
gptqmodel/quantization/foem.py
gptqmodel/quantization/gar.py
gptqmodel/quantization/gar_ref.py
gptqmodel/quantization/gptaq.py
gptqmodel/quantization/gptq.py
gptqmodel/quantization/npu_linalg.py
gptqmodel/quantization/protocol.py
gptqmodel/quantization/qqq.py
gptqmodel/quantization/quantizer.py
gptqmodel/quantization/rtn.py
gptqmodel/quantization/awq/__init__.py
gptqmodel/quantization/awq/modules/__init__.py
gptqmodel/quantization/awq/modules/act.py
gptqmodel/quantization/awq/modules/triton/__init__.py
gptqmodel/quantization/awq/modules/triton/gemm.py
gptqmodel/quantization/awq/quantize/__init__.py
gptqmodel/quantization/awq/quantize/scale.py
gptqmodel/quantization/awq/utils/__init__.py
gptqmodel/quantization/awq/utils/calib_data.py
gptqmodel/quantization/awq/utils/module.py
gptqmodel/quantization/awq/utils/packing_utils.py
gptqmodel/quantization/awq/utils/utils.py
gptqmodel/quantization/paroquant/__init__.py
gptqmodel/quantization/paroquant/optimization.py
gptqmodel/quantization/paroquant/modules/__init__.py
gptqmodel/quantization/paroquant/modules/triton/__init__.py
gptqmodel/quantization/paroquant/modules/triton/gemm.py
gptqmodel/quantization/rotation/__init__.py
gptqmodel/quantization/rotation/hadamard_utils.py
gptqmodel/quantization/rotation/rotation.py
gptqmodel/utils/__init__.py
gptqmodel/utils/attn_mask.py
gptqmodel/utils/audio.py
gptqmodel/utils/awq.py
gptqmodel/utils/backend.py
gptqmodel/utils/bitblas.py
gptqmodel/utils/calibration.py
gptqmodel/utils/colors.py
gptqmodel/utils/cpp.py
gptqmodel/utils/ctx.py
gptqmodel/utils/cuda_activation_buffer.py
gptqmodel/utils/data.py
gptqmodel/utils/device.py
gptqmodel/utils/device_telemetry.py
gptqmodel/utils/disk.py
gptqmodel/utils/env.py
gptqmodel/utils/exllamav2.py
gptqmodel/utils/exllamav3.py
gptqmodel/utils/fallback.py
gptqmodel/utils/gemv.py
gptqmodel/utils/hf.py
gptqmodel/utils/hub.py
gptqmodel/utils/image.py
gptqmodel/utils/importer.py
gptqmodel/utils/inspect.py
gptqmodel/utils/internal_gguf.py
gptqmodel/utils/jit_compile_baselines.py
gptqmodel/utils/linalg_warmup.py
gptqmodel/utils/logger.py
gptqmodel/utils/looper_helpers.py
gptqmodel/utils/machete.py
gptqmodel/utils/marlin.py
gptqmodel/utils/marlin_scalar_type.py
gptqmodel/utils/memory.py
gptqmodel/utils/mlx.py
gptqmodel/utils/mmlupro.py
gptqmodel/utils/model.py
gptqmodel/utils/model_dequant.py
gptqmodel/utils/modelscope.py
gptqmodel/utils/module_locks.py
gptqmodel/utils/nogil_patcher.py
gptqmodel/utils/offload.py
gptqmodel/utils/openai_server.py
gptqmodel/utils/paroquant.py
gptqmodel/utils/paroquant_benchmark.py
gptqmodel/utils/python.py
gptqmodel/utils/qqq.py
gptqmodel/utils/random_str.py
gptqmodel/utils/rocm.py
gptqmodel/utils/safe.py
gptqmodel/utils/safetensor.py
gptqmodel/utils/sglang.py
gptqmodel/utils/stream.py
gptqmodel/utils/structure.py
gptqmodel/utils/tensor.py
gptqmodel/utils/terminal.py
gptqmodel/utils/threads.py
gptqmodel/utils/threadx.py
gptqmodel/utils/torch.py
gptqmodel/utils/vllm.py
gptqmodel/utils/vram.py
gptqmodel_ext/__init__.py
gptqmodel_ext/floatx_cpu.cpp
gptqmodel_ext/pack_block_cpu.cpp
gptqmodel_ext/awq/gemm_fast_cuda_entry.cu
gptqmodel_ext/awq/gemv_fast_cuda_entry.cu
gptqmodel_ext/awq/torch_bind.cpp
gptqmodel_ext/awq/quantization/dequantize.cuh
gptqmodel_ext/awq/quantization/gemm_cuda.h
gptqmodel_ext/awq/quantization/gemm_cuda_gen.cu
gptqmodel_ext/awq/quantization/gemv_cuda.cu
gptqmodel_ext/awq/quantization/gemv_cuda.h
gptqmodel_ext/awq/quantization_new/dequantize.cuh
gptqmodel_ext/awq/quantization_new/gemm/gemm_cuda.cu
gptqmodel_ext/awq/quantization_new/gemm/gemm_cuda.h
gptqmodel_ext/awq/quantization_new/gemm/semaphore.h
gptqmodel_ext/awq/quantization_new/gemv/gemv_cuda.cu
gptqmodel_ext/awq/quantization_new/gemv/gemv_cuda.h
gptqmodel_ext/cutlass_extensions/__init__.py
gptqmodel_ext/cutlass_extensions/common.cpp
gptqmodel_ext/cutlass_extensions/common.hpp
gptqmodel_ext/cutlass_extensions/cute_utils.cuh
gptqmodel_ext/cutlass_extensions/torch_utils.hpp
gptqmodel_ext/cutlass_extensions/vllm_collective_builder.cuh
gptqmodel_ext/cutlass_extensions/vllm_custom_types.cuh
gptqmodel_ext/cutlass_extensions/vllm_cutlass_library_extension.py
gptqmodel_ext/cutlass_extensions/vllm_numeric_conversion.cuh
gptqmodel_ext/cutlass_extensions/vllm_type_utils.cuh
gptqmodel_ext/cutlass_extensions/epilogue/broadcast_load_epilogue_array_c3x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/broadcast_load_epilogue_c2x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/broadcast_load_epilogue_c3x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/scaled_mm_epilogues_c2x.hpp
gptqmodel_ext/cutlass_extensions/epilogue/scaled_mm_epilogues_c3x.hpp
gptqmodel_ext/exllamav2/config.h
gptqmodel_ext/exllamav2/ext.cpp
gptqmodel_ext/exllamav2/ext_awq.cpp
gptqmodel_ext/exllamav2/ext_common.h
gptqmodel_ext/exllamav2/ext_gptq.cpp
gptqmodel_ext/exllamav2/cpp/util.h
gptqmodel_ext/exllamav2/cuda/compat.cuh
gptqmodel_ext/exllamav2/cuda/compat_gemm.cuh
gptqmodel_ext/exllamav2/cuda/matrix_view.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm.cu
gptqmodel_ext/exllamav2/cuda/q_gemm.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_awq.cu
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel_awq.cuh
gptqmodel_ext/exllamav2/cuda/q_gemm_kernel_gptq.cuh
gptqmodel_ext/exllamav2/cuda/q_matrix.cu
gptqmodel_ext/exllamav2/cuda/q_matrix.cuh
gptqmodel_ext/exllamav2/cuda/q_matrix_awq.cu
gptqmodel_ext/exllamav2/cuda/util.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_2.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_3.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_4.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_5.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_6.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_8.cuh
gptqmodel_ext/exllamav2/cuda/quant/qdq_util.cuh
gptqmodel_ext/exllamav3/bindings.cpp
gptqmodel_ext/exllamav3/hadamard.cpp
gptqmodel_ext/exllamav3/hadamard.h
gptqmodel_ext/exllamav3/hgemm.cu
gptqmodel_ext/exllamav3/hgemm.cuh
gptqmodel_ext/exllamav3/ptx.cuh
gptqmodel_ext/exllamav3/util.cuh
gptqmodel_ext/exllamav3/util.h
gptqmodel_ext/exllamav3/libtorch/linear.cpp
gptqmodel_ext/exllamav3/libtorch/linear.h
gptqmodel_ext/exllamav3/libtorch/linear_bc.h
gptqmodel_ext/exllamav3/quant/codebook.cuh
gptqmodel_ext/exllamav3/quant/exl3_devctx.cu
gptqmodel_ext/exllamav3/quant/exl3_devctx.cuh
gptqmodel_ext/exllamav3/quant/exl3_dq.cuh
gptqmodel_ext/exllamav3/quant/exl3_gemm.cu
gptqmodel_ext/exllamav3/quant/exl3_gemm.cuh
gptqmodel_ext/exllamav3/quant/exl3_gemm_inner.cuh
gptqmodel_ext/exllamav3/quant/exl3_gemm_kernel.cuh
gptqmodel_ext/exllamav3/quant/exl3_kernel_map.cu
gptqmodel_ext/exllamav3/quant/exl3_kernel_map.cuh
gptqmodel_ext/exllamav3/quant/exl3_kernel_map_packed.cuh
gptqmodel_ext/exllamav3/quant/hadamard.cu
gptqmodel_ext/exllamav3/quant/hadamard.cuh
gptqmodel_ext/exllamav3/quant/hadamard_inner.cuh
gptqmodel_ext/exllamav3/quant/pack.cu
gptqmodel_ext/exllamav3/quant/pack.cuh
gptqmodel_ext/exllamav3/quant/quantize.cu
gptqmodel_ext/exllamav3/quant/quantize.cuh
gptqmodel_ext/exllamav3/quant/reconstruct.cu
gptqmodel_ext/exllamav3/quant/reconstruct.cuh
gptqmodel_ext/exllamav3/quant/util.cu
gptqmodel_ext/exllamav3/quant/util.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_1.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_1.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_2.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_2.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_3.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_3.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_4.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_4.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_5.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_5.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_6.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_6.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_7.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_7.cuh
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_8.cu
gptqmodel_ext/exllamav3/quant/comp_units/exl3_comp_unit_8.cuh
gptqmodel_ext/machete/generate.py
gptqmodel_ext/machete/machete_collective_builder.cuh
gptqmodel_ext/machete/machete_interleaving_utils.cuh
gptqmodel_ext/machete/machete_mainloop.cuh
gptqmodel_ext/machete/machete_mm_kernel.cuh
gptqmodel_ext/machete/machete_mm_launcher.cuh
gptqmodel_ext/machete/machete_prepack_kernel.cuh
gptqmodel_ext/machete/machete_prepack_launcher.cuh
gptqmodel_ext/machete/machete_prepacked_layout.cuh
gptqmodel_ext/machete/machete_pytorch.cu
gptqmodel_ext/machete/core/registration.h
gptqmodel_ext/machete/core/scalar_type.hpp
gptqmodel_ext/marlin/awq_marlin_repack.cu
gptqmodel_ext/marlin/awq_marlin_repack.cuh
gptqmodel_ext/marlin/dequant.h
gptqmodel_ext/marlin/generate_kernels.py
gptqmodel_ext/marlin/gptq_marlin.cu
gptqmodel_ext/marlin/gptq_marlin.cuh
gptqmodel_ext/marlin/gptq_marlin_bf16.cu
gptqmodel_ext/marlin/gptq_marlin_fp16.cu
gptqmodel_ext/marlin/gptq_marlin_repack.cu
gptqmodel_ext/marlin/gptq_marlin_repack.cuh
gptqmodel_ext/marlin/kernel.h
gptqmodel_ext/marlin/marlin.cuh
gptqmodel_ext/marlin/marlin_dtypes.cuh
gptqmodel_ext/marlin/marlin_mma.h
gptqmodel_ext/marlin/marlin_template.h
gptqmodel_ext/marlin/marlin_torch_bf16.cpp
gptqmodel_ext/marlin/marlin_torch_fp16.cpp
gptqmodel_ext/marlin/core/registration.h
gptqmodel_ext/marlin/core/scalar_type.hpp
gptqmodel_ext/paroquant/rotation.cu
gptqmodel_ext/paroquant/rotation.cuh
gptqmodel_ext/qqq/qqq.cpp
gptqmodel_ext/qqq/qqq_gemm.cu
gptqmodel_ext/qqq/qqq_gemm.h
licenses/LICENSE.apache