CMakeLists.txt
LICENSE
LICENSE.metal
MANIFEST.in
Makefile
README.md
pyproject.toml
setup.py
bin/CMakeLists.txt
bin/RegisterTritonDialects.h
bin/triton-llvm-opt.cpp
bin/triton-lsp.cpp
bin/triton-opt.cpp
bin/triton-reduce.cpp
bin/triton-tensor-layout.cpp
cmake/AddTritonUnitTest.cmake
cmake/FindLLVM.cmake
cmake/json-version.txt
cmake/llvm-hash.txt
cmake/nvidia-toolchain-version.json
docs/Makefile
docs/conf.py
docs/index.rst
docs/metal-backend-testing.md
docs/metal-backend.md
docs/_templates/versions.html
docs/backend/ldmatrixOperand0.svg
docs/backend/ldmatrixOperand1.svg
docs/getting-started/installation.rst
docs/getting-started/tutorials/grouped_vs_row_major_ordering.png
docs/getting-started/tutorials/parallel_reduction.png
docs/getting-started/tutorials/random_bits.png
docs/meetups/dev-meetup-2023.md
docs/meetups/dev_conference_2024.md
docs/meetups/01-24-2024/notes.md
docs/meetups/02-20-2024/Proton.pdf
docs/meetups/02-20-2024/notes.md
docs/meetups/04-02-2024/notes.md
docs/meetups/05-07-2024/notes.md
docs/meetups/07-18-2023/notes.md
docs/meetups/08-06-2024/notes.md
docs/meetups/08-22-2023/amd-update.pdf
docs/meetups/08-22-2023/intel-xpu-update.pptx
docs/meetups/08-22-2023/notes.md
docs/meetups/10-25-2023/intel-xpu-update.pdf
docs/meetups/10-25-2023/notes.md
docs/meetups/10-25-2023/triton-shared.pptx
docs/meetups/12-13-2023/notes.md
docs/programming-guide/chapter-1/cuda-parallel-matmul.png
docs/programming-guide/chapter-1/introduction.rst
docs/programming-guide/chapter-1/triton-parallel-matmul.png
docs/programming-guide/chapter-2/halide-iteration.png
docs/programming-guide/chapter-2/polyhedral-iteration.png
docs/programming-guide/chapter-2/related-work.rst
docs/programming-guide/chapter-3/debugging.rst
docs/python-api/triton-semantics.rst
docs/python-api/triton.language.extra.cuda.rst
docs/python-api/triton.language.rst
docs/python-api/triton.rst
docs/python-api/triton.testing.rst
include/CMakeLists.txt
include/triton/CMakeLists.txt
include/triton/Analysis/Alias.h
include/triton/Analysis/Allocation.h
include/triton/Analysis/AxisInfo.h
include/triton/Analysis/Membar.h
include/triton/Analysis/Utility.h
include/triton/Conversion/CMakeLists.txt
include/triton/Conversion/MLIRTypes.h
include/triton/Conversion/TritonGPUToLLVM/AsmFormat.h
include/triton/Conversion/TritonGPUToLLVM/CMakeLists.txt
include/triton/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVMBase.h
include/triton/Conversion/TritonGPUToLLVM/FMADotUtility.h
include/triton/Conversion/TritonGPUToLLVM/Passes.h
include/triton/Conversion/TritonGPUToLLVM/Passes.td
include/triton/Conversion/TritonGPUToLLVM/PatternTritonGPUOpToLLVM.h
include/triton/Conversion/TritonGPUToLLVM/TargetInfoBase.h
include/triton/Conversion/TritonGPUToLLVM/TypeConverter.h
include/triton/Conversion/TritonGPUToLLVM/Utility.h
include/triton/Conversion/TritonToTritonGPU/CMakeLists.txt
include/triton/Conversion/TritonToTritonGPU/Passes.h
include/triton/Conversion/TritonToTritonGPU/Passes.td
include/triton/Dialect/CMakeLists.txt
include/triton/Dialect/Triton/CMakeLists.txt
include/triton/Dialect/Triton/IR/CMakeLists.txt
include/triton/Dialect/Triton/IR/Dialect.h
include/triton/Dialect/Triton/IR/Interfaces.h
include/triton/Dialect/Triton/IR/OpInterfaces.h
include/triton/Dialect/Triton/IR/Traits.h
include/triton/Dialect/Triton/IR/TritonAttrDefs.td
include/triton/Dialect/Triton/IR/TritonDialect.td
include/triton/Dialect/Triton/IR/TritonInterfaces.td
include/triton/Dialect/Triton/IR/TritonOpInterfaces.td
include/triton/Dialect/Triton/IR/TritonOps.td
include/triton/Dialect/Triton/IR/TritonTypes.td
include/triton/Dialect/Triton/IR/Types.h
include/triton/Dialect/Triton/IR/Utility.h
include/triton/Dialect/Triton/Transforms/CMakeLists.txt
include/triton/Dialect/Triton/Transforms/Passes.h
include/triton/Dialect/Triton/Transforms/Passes.td
include/triton/Dialect/TritonGPU/CMakeLists.txt
include/triton/Dialect/TritonGPU/IR/Attributes.h
include/triton/Dialect/TritonGPU/IR/CMakeLists.txt
include/triton/Dialect/TritonGPU/IR/Dialect.h
include/triton/Dialect/TritonGPU/IR/LayoutUtilities.h
include/triton/Dialect/TritonGPU/IR/LayoutUtility.h
include/triton/Dialect/TritonGPU/IR/LinearLayoutConversions.h
include/triton/Dialect/TritonGPU/IR/Traits.h
include/triton/Dialect/TritonGPU/IR/TritonGPUAttrDefs.td
include/triton/Dialect/TritonGPU/IR/TritonGPUDialect.td
include/triton/Dialect/TritonGPU/IR/TritonGPUInterfaces.h
include/triton/Dialect/TritonGPU/IR/TritonGPUOps.td
include/triton/Dialect/TritonGPU/IR/TritonGPUTypeInterfaces.td
include/triton/Dialect/TritonGPU/IR/TritonGPUTypes.td
include/triton/Dialect/TritonGPU/IR/Types.h
include/triton/Dialect/TritonGPU/Transforms/CMakeLists.txt
include/triton/Dialect/TritonGPU/Transforms/DecomposeScaledBlocked.h
include/triton/Dialect/TritonGPU/Transforms/MMAv5PipelineUtility.h
include/triton/Dialect/TritonGPU/Transforms/Partition.h
include/triton/Dialect/TritonGPU/Transforms/Passes.h
include/triton/Dialect/TritonGPU/Transforms/Passes.td
include/triton/Dialect/TritonGPU/Transforms/PipelineExpander.h
include/triton/Dialect/TritonGPU/Transforms/PipeliningUtility.h
include/triton/Dialect/TritonGPU/Transforms/Schedule.h
include/triton/Dialect/TritonGPU/Transforms/TritonGPUConversion.h
include/triton/Dialect/TritonGPU/Transforms/Utility.h
include/triton/Dialect/TritonGPU/Transforms/WarpSpecialization.h
include/triton/Dialect/TritonMetal/CMakeLists.txt
include/triton/Dialect/TritonMetal/IR/CMakeLists.txt
include/triton/Dialect/TritonMetal/IR/Dialect.h
include/triton/Dialect/TritonMetal/IR/Dialect.td
include/triton/Dialect/TritonMetal/IR/TritonMetalDialect.cpp.inc
include/triton/Dialect/TritonMetal/IR/TritonMetalDialect.h.inc
include/triton/Dialect/TritonMetal/Transforms/CMakeLists.txt
include/triton/Dialect/TritonMetal/Transforms/Passes.h
include/triton/Dialect/TritonMetal/Transforms/Passes.h.inc
include/triton/Dialect/TritonMetal/Transforms/Passes.td
include/triton/Dialect/TritonNvidiaGPU/CMakeLists.txt
include/triton/Dialect/TritonNvidiaGPU/IR/CMakeLists.txt
include/triton/Dialect/TritonNvidiaGPU/IR/Dialect.h
include/triton/Dialect/TritonNvidiaGPU/IR/TritonNvidiaGPUAttrDefs.td
include/triton/Dialect/TritonNvidiaGPU/IR/TritonNvidiaGPUDialect.td
include/triton/Dialect/TritonNvidiaGPU/IR/TritonNvidiaGPUOpInterfaces.td
include/triton/Dialect/TritonNvidiaGPU/IR/TritonNvidiaGPUOps.td
include/triton/Dialect/TritonNvidiaGPU/Transforms/CMakeLists.txt
include/triton/Dialect/TritonNvidiaGPU/Transforms/Passes.h
include/triton/Dialect/TritonNvidiaGPU/Transforms/Passes.td
include/triton/Dialect/TritonNvidiaGPU/Transforms/TMAUtilities.h
include/triton/Target/CMakeLists.txt
include/triton/Target/LLVMIR/CMakeLists.txt
include/triton/Target/LLVMIR/Passes.h
include/triton/Target/LLVMIR/Passes.td
include/triton/Tools/LayoutUtils.h
include/triton/Tools/LinearLayout.h
include/triton/Tools/StrUtil.h
include/triton/Tools/Sys/GetEnv.hpp
lib/CMakeLists.txt
lib/Analysis/Alias.cpp
lib/Analysis/Allocation.cpp
lib/Analysis/AxisInfo.cpp
lib/Analysis/CMakeLists.txt
lib/Analysis/Membar.cpp
lib/Analysis/Utility.cpp
lib/Conversion/CMakeLists.txt
lib/Conversion/TritonGPUToLLVM/AllocateSharedMemory.cpp
lib/Conversion/TritonGPUToLLVM/AllocateWarpGroups.cpp
lib/Conversion/TritonGPUToLLVM/AssertOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/CMakeLists.txt
lib/Conversion/TritonGPUToLLVM/ControlFlowOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/ConvertLayoutOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/ElementwiseOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/FuncOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/GatherOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/GlobalScratchMemoryAllocation.cpp
lib/Conversion/TritonGPUToLLVM/HistogramOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/MakeRangeOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/MemoryOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/PrintOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/ReduceOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/ReduceScanCommon.h
lib/Conversion/TritonGPUToLLVM/SPMDOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/ScanOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/TypeConverter.cpp
lib/Conversion/TritonGPUToLLVM/Utility.cpp
lib/Conversion/TritonGPUToLLVM/ViewOpToLLVM.cpp
lib/Conversion/TritonGPUToLLVM/DotOpToLLVM/FMA.cpp
lib/Conversion/TritonGPUToLLVM/DotOpToLLVM/FMADotUtility.cpp
lib/Conversion/TritonToTritonGPU/CMakeLists.txt
lib/Conversion/TritonToTritonGPU/RelayoutTritonGPU.cpp
lib/Conversion/TritonToTritonGPU/TritonGPUConversion.cpp
lib/Conversion/TritonToTritonGPU/TritonToTritonGPUPass.cpp
lib/Dialect/CMakeLists.txt
lib/Dialect/Triton/CMakeLists.txt
lib/Dialect/Triton/IR/CMakeLists.txt
lib/Dialect/Triton/IR/Canonicalize.td
lib/Dialect/Triton/IR/Dialect.cpp
lib/Dialect/Triton/IR/OpInterfaces.cpp
lib/Dialect/Triton/IR/Ops.cpp
lib/Dialect/Triton/IR/Traits.cpp
lib/Dialect/Triton/IR/Types.cpp
lib/Dialect/Triton/IR/Utility.cpp
lib/Dialect/Triton/Transforms/CMakeLists.txt
lib/Dialect/Triton/Transforms/Combine.cpp
lib/Dialect/Triton/Transforms/Combine.td
lib/Dialect/Triton/Transforms/LoopInvariantCodeMotion.cpp
lib/Dialect/Triton/Transforms/LoopUnroll.cpp
lib/Dialect/Triton/Transforms/ReorderBroadcast.cpp
lib/Dialect/Triton/Transforms/RewriteTensorPointer.cpp
lib/Dialect/TritonGPU/CMakeLists.txt
lib/Dialect/TritonGPU/IR/CMakeLists.txt
lib/Dialect/TritonGPU/IR/Dialect.cpp
lib/Dialect/TritonGPU/IR/LayoutUtility.cpp
lib/Dialect/TritonGPU/IR/LinearLayoutConversions.cpp
lib/Dialect/TritonGPU/IR/Ops.cpp
lib/Dialect/TritonGPU/IR/Types.cpp
lib/Dialect/TritonGPU/Transforms/AccelerateMatmul.cpp
lib/Dialect/TritonGPU/Transforms/CMakeLists.txt
lib/Dialect/TritonGPU/Transforms/Coalesce.cpp
lib/Dialect/TritonGPU/Transforms/CoalesceAsyncCopy.cpp
lib/Dialect/TritonGPU/Transforms/CombineTensorSelectAndIf.cpp
lib/Dialect/TritonGPU/Transforms/DecomposeScaledBlocked.cpp
lib/Dialect/TritonGPU/Transforms/F32DotTC.cpp
lib/Dialect/TritonGPU/Transforms/FuseNestedLoops.cpp
lib/Dialect/TritonGPU/Transforms/HoistTMEMAlloc.cpp
lib/Dialect/TritonGPU/Transforms/OptimizeAccumulatorInit.cpp
lib/Dialect/TritonGPU/Transforms/OptimizeDotOperands.cpp
lib/Dialect/TritonGPU/Transforms/OptimizeThreadLocality.cpp
lib/Dialect/TritonGPU/Transforms/Prefetch.cpp
lib/Dialect/TritonGPU/Transforms/ReduceDataDuplication.cpp
lib/Dialect/TritonGPU/Transforms/RemoveLayoutConversions.cpp
lib/Dialect/TritonGPU/Transforms/ReorderInstructions.cpp
lib/Dialect/TritonGPU/Transforms/Utility.cpp
lib/Dialect/TritonGPU/Transforms/WGMMAPrefetch.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/AssignLatencies.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/LowerLoops.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/MMAv5PipelineUtility.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/PipelineExpander.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/PipeliningUtility.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/Schedule.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/ScheduleLoops.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/SoftwarePipeliner.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/TMAStoresPipeline.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/TestPipelineAssignLatencies.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/TestPipelineLowerLoop.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/TestPipelineScheduleLoop.cpp
lib/Dialect/TritonGPU/Transforms/Pipeliner/WGMMAPipeline.cpp
lib/Dialect/TritonGPU/Transforms/WarpSpecialization/AutomaticWarpSpecialization.cpp
lib/Dialect/TritonGPU/Transforms/WarpSpecialization/LoadMMASpecialization.cpp
lib/Dialect/TritonGPU/Transforms/WarpSpecialization/OptimizePartitionWarps.cpp
lib/Dialect/TritonGPU/Transforms/WarpSpecialization/Partition.cpp
lib/Dialect/TritonGPU/Transforms/WarpSpecialization/PartitionLoops.cpp
lib/Dialect/TritonGPU/Transforms/WarpSpecialization/RewritePartitionDependencies.cpp
lib/Dialect/TritonMetal/CMakeLists.txt
lib/Dialect/TritonMetal/IR/CMakeLists.txt
lib/Dialect/TritonMetal/IR/Dialect.cpp
lib/Dialect/TritonMetal/Transforms/CMakeLists.txt
lib/Dialect/TritonMetal/Transforms/Passes.cpp
lib/Dialect/TritonNvidiaGPU/CMakeLists.txt
lib/Dialect/TritonNvidiaGPU/IR/CMakeLists.txt
lib/Dialect/TritonNvidiaGPU/IR/Dialect.cpp
lib/Dialect/TritonNvidiaGPU/IR/Ops.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/CMakeLists.txt
lib/Dialect/TritonNvidiaGPU/Transforms/FenceInsertion.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/MMALowering.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/OptimizeDescriptorEncoding.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/OptimizeTMemLayouts.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/PlanCTA.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/PromoteLHSToTMem.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/RemoveTMEMTokens.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/TMALowering.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/TMAUtilities.cpp
lib/Dialect/TritonNvidiaGPU/Transforms/TensorMemoryAllocation.cpp
lib/Instrumentation/CMakeLists.txt
lib/Instrumentation/PrintLoadStoreMemSpaces.cpp
lib/Target/CMakeLists.txt
lib/Target/LLVMIR/CMakeLists.txt
lib/Target/LLVMIR/LLVMDIScope.cpp
lib/Target/LLVMIR/LLVMIRBreakPhiStruct.cpp
lib/Target/LLVMIR/LLVMPasses.h
lib/Tools/CMakeLists.txt
lib/Tools/LayoutUtils.cpp
lib/Tools/LinearLayout.cpp
python/build_helpers.py
python/requirements.txt
python/test-requirements.txt
python/src/interpreter.cc
python/src/ir.cc
python/src/llvm.cc
python/src/main.cc
python/src/passes.cc
python/src/passes.h
python/test/backend/extension_backend.c
python/test/backend/test_device_backend.py
python/test/kernel_comparison/kernels.yml
python/test/regression/conftest.py
python/test/regression/test_cast_matmul.py
python/test/regression/test_functional_regressions.py
python/test/unit/conftest.py
python/test/unit/test_debug.py
python/test/unit/test_debug_dump.py
python/test/unit/test_knobs.py
python/test/unit/test_perf_warning.py
python/test/unit/blackwell/test_tmem.py
python/test/unit/cuda/__init__.py
python/test/unit/cuda/test_flashattention.py
python/test/unit/cuda/test_gemm.py
python/test/unit/cuda/test_gemm_fusion.py
python/test/unit/cuda/test_mixed_io.py
python/test/unit/cuda/test_tensor_descriptor.py
python/test/unit/cuda/test_tma_descriptor.py
python/test/unit/cuda/test_tma_store_gemm.py
python/test/unit/instrumentation/test_gpuhello.py
python/test/unit/language/print_helper.py
python/test/unit/language/test_annotations.py
python/test/unit/language/test_block_pointer.py
python/test/unit/language/test_compile_errors.py
python/test/unit/language/test_compile_only.py
python/test/unit/language/test_conversions.py
python/test/unit/language/test_core.py
python/test/unit/language/test_decorator.py
python/test/unit/language/test_libdevice.py
python/test/unit/language/test_line_info.py
python/test/unit/language/test_matmul.py
python/test/unit/language/test_mxfp.py
python/test/unit/language/test_pipeliner.py
python/test/unit/language/test_random.py
python/test/unit/language/test_reproducer.py
python/test/unit/language/test_standard.py
python/test/unit/language/test_subprocess.py
python/test/unit/language/test_tuple.py
python/test/unit/language/test_warp_specialization.py
python/test/unit/runtime/test_autotuner.py
python/test/unit/runtime/test_bindings.py
python/test/unit/runtime/test_cache.py
python/test/unit/runtime/test_compilation_listener.py
python/test/unit/runtime/test_cublas.py
python/test/unit/runtime/test_driver.py
python/test/unit/runtime/test_jit.py
python/test/unit/runtime/test_launch.py
python/test/unit/runtime/test_subproc.py
python/test/unit/tools/test_aot.py
python/test/unit/tools/test_disasm.py
python/test/unit/tools/test_irsource.py
python/triton/__init__.py
python/triton/_internal_testing.py
python/triton/_utils.py
python/triton/errors.py
python/triton/knobs.py
python/triton/testing.py
python/triton/backends/__init__.py
python/triton/backends/compiler.py
python/triton/backends/driver.py
python/triton/backends/amd/__init__.py
python/triton/backends/amd/compiler.py
python/triton/backends/amd/driver.c
python/triton/backends/amd/driver.py
python/triton/backends/amd/include/hip/channel_descriptor.h
python/triton/backends/amd/include/hip/device_functions.h
python/triton/backends/amd/include/hip/driver_types.h
python/triton/backends/amd/include/hip/hip_bf16.h
python/triton/backends/amd/include/hip/hip_bfloat16.h
python/triton/backends/amd/include/hip/hip_common.h
python/triton/backends/amd/include/hip/hip_complex.h
python/triton/backends/amd/include/hip/hip_cooperative_groups.h
python/triton/backends/amd/include/hip/hip_deprecated.h
python/triton/backends/amd/include/hip/hip_ext.h
python/triton/backends/amd/include/hip/hip_fp16.h
python/triton/backends/amd/include/hip/hip_fp8.h
python/triton/backends/amd/include/hip/hip_gl_interop.h
python/triton/backends/amd/include/hip/hip_hcc.h
python/triton/backends/amd/include/hip/hip_math_constants.h
python/triton/backends/amd/include/hip/hip_profile.h
python/triton/backends/amd/include/hip/hip_runtime.h
python/triton/backends/amd/include/hip/hip_runtime_api.h
python/triton/backends/amd/include/hip/hip_texture_types.h
python/triton/backends/amd/include/hip/hip_vector_types.h
python/triton/backends/amd/include/hip/hip_version.h
python/triton/backends/amd/include/hip/hiprtc.h
python/triton/backends/amd/include/hip/library_types.h
python/triton/backends/amd/include/hip/math_functions.h
python/triton/backends/amd/include/hip/surface_types.h
python/triton/backends/amd/include/hip/texture_types.h
python/triton/backends/amd/include/hip/amd_detail/amd_channel_descriptor.h
python/triton/backends/amd/include/hip/amd_detail/amd_device_functions.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_atomic.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_bf16.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_bfloat16.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_common.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_complex.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_cooperative_groups.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_fp16.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_fp8.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_gl_interop.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_math_constants.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_runtime.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_runtime_pt_api.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_unsafe_atomics.h
python/triton/backends/amd/include/hip/amd_detail/amd_hip_vector_types.h
python/triton/backends/amd/include/hip/amd_detail/amd_math_functions.h
python/triton/backends/amd/include/hip/amd_detail/amd_surface_functions.h
python/triton/backends/amd/include/hip/amd_detail/amd_warp_functions.h
python/triton/backends/amd/include/hip/amd_detail/amd_warp_sync_functions.h
python/triton/backends/amd/include/hip/amd_detail/concepts.hpp
python/triton/backends/amd/include/hip/amd_detail/device_library_decls.h
python/triton/backends/amd/include/hip/amd_detail/functional_grid_launch.hpp
python/triton/backends/amd/include/hip/amd_detail/grid_launch.h
python/triton/backends/amd/include/hip/amd_detail/grid_launch.hpp
python/triton/backends/amd/include/hip/amd_detail/grid_launch_GGL.hpp
python/triton/backends/amd/include/hip/amd_detail/helpers.hpp
python/triton/backends/amd/include/hip/amd_detail/hip_api_trace.hpp
python/triton/backends/amd/include/hip/amd_detail/hip_assert.h
python/triton/backends/amd/include/hip/amd_detail/hip_cooperative_groups_helper.h
python/triton/backends/amd/include/hip/amd_detail/hip_fp16_gcc.h
python/triton/backends/amd/include/hip/amd_detail/hip_fp16_math_fwd.h
python/triton/backends/amd/include/hip/amd_detail/hip_ldg.h
python/triton/backends/amd/include/hip/amd_detail/hip_prof_str.h
python/triton/backends/amd/include/hip/amd_detail/hip_runtime_prof.h
python/triton/backends/amd/include/hip/amd_detail/host_defines.h
python/triton/backends/amd/include/hip/amd_detail/hsa_helpers.hpp
python/triton/backends/amd/include/hip/amd_detail/macro_based_grid_launch.hpp
python/triton/backends/amd/include/hip/amd_detail/math_fwd.h
python/triton/backends/amd/include/hip/amd_detail/ockl_image.h
python/triton/backends/amd/include/hip/amd_detail/program_state.hpp
python/triton/backends/amd/include/hip/amd_detail/texture_fetch_functions.h
python/triton/backends/amd/include/hip/amd_detail/texture_indirect_functions.h
python/triton/backends/amd/include/hsa/Brig.h
python/triton/backends/amd/include/hsa/amd_hsa_common.h
python/triton/backends/amd/include/hsa/amd_hsa_elf.h
python/triton/backends/amd/include/hsa/amd_hsa_kernel_code.h
python/triton/backends/amd/include/hsa/amd_hsa_queue.h
python/triton/backends/amd/include/hsa/amd_hsa_signal.h
python/triton/backends/amd/include/hsa/hsa.h
python/triton/backends/amd/include/hsa/hsa_amd_tool.h
python/triton/backends/amd/include/hsa/hsa_api_trace.h
python/triton/backends/amd/include/hsa/hsa_api_trace_version.h
python/triton/backends/amd/include/hsa/hsa_ext_amd.h
python/triton/backends/amd/include/hsa/hsa_ext_finalize.h
python/triton/backends/amd/include/hsa/hsa_ext_image.h
python/triton/backends/amd/include/hsa/hsa_ven_amd_aqlprofile.h
python/triton/backends/amd/include/hsa/hsa_ven_amd_loader.h
python/triton/backends/amd/include/hsa/hsa_ven_amd_pc_sampling.h
python/triton/backends/amd/include/roctracer/hip_ostream_ops.h
python/triton/backends/amd/include/roctracer/hsa_ostream_ops.h
python/triton/backends/amd/include/roctracer/hsa_prof_str.h
python/triton/backends/amd/include/roctracer/roctracer.h
python/triton/backends/amd/include/roctracer/roctracer_ext.h
python/triton/backends/amd/include/roctracer/roctracer_hcc.h
python/triton/backends/amd/include/roctracer/roctracer_hip.h
python/triton/backends/amd/include/roctracer/roctracer_hsa.h
python/triton/backends/amd/include/roctracer/roctracer_plugin.h
python/triton/backends/amd/include/roctracer/roctracer_roctx.h
python/triton/backends/amd/include/roctracer/roctx.h
python/triton/backends/amd/include/roctracer/ext/prof_protocol.h
python/triton/backends/amd/lib/asanrtl.bc
python/triton/backends/amd/lib/ockl.bc
python/triton/backends/amd/lib/ocml.bc
python/triton/backends/metal/__init__.py
python/triton/backends/metal/compiler.py
python/triton/backends/metal/driver.py
python/triton/backends/metal/include/metal/metal_api.h
python/triton/backends/metal/lib/metal_api.cpp
python/triton/backends/nvidia/__init__.py
python/triton/backends/nvidia/compiler.py
python/triton/backends/nvidia/driver.c
python/triton/backends/nvidia/driver.py
python/triton/backends/nvidia/bin/cuobjdump
python/triton/backends/nvidia/bin/nvdisasm
python/triton/backends/nvidia/bin/ptxas
python/triton/backends/nvidia/include/builtin_types.h
python/triton/backends/nvidia/include/channel_descriptor.h
python/triton/backends/nvidia/include/common_functions.h
python/triton/backends/nvidia/include/cooperative_groups.h
python/triton/backends/nvidia/include/cuComplex.h
python/triton/backends/nvidia/include/cuda.h
python/triton/backends/nvidia/include/cudaEGL.h
python/triton/backends/nvidia/include/cudaEGLTypedefs.h
python/triton/backends/nvidia/include/cudaGL.h
python/triton/backends/nvidia/include/cudaGLTypedefs.h
python/triton/backends/nvidia/include/cudaProfilerTypedefs.h
python/triton/backends/nvidia/include/cudaTypedefs.h
python/triton/backends/nvidia/include/cudaVDPAU.h
python/triton/backends/nvidia/include/cudaVDPAUTypedefs.h
python/triton/backends/nvidia/include/cuda_awbarrier.h
python/triton/backends/nvidia/include/cuda_awbarrier_helpers.h
python/triton/backends/nvidia/include/cuda_awbarrier_primitives.h
python/triton/backends/nvidia/include/cuda_bf16.h
python/triton/backends/nvidia/include/cuda_bf16.hpp
python/triton/backends/nvidia/include/cuda_device_runtime_api.h
python/triton/backends/nvidia/include/cuda_egl_interop.h
python/triton/backends/nvidia/include/cuda_fp16.h
python/triton/backends/nvidia/include/cuda_fp16.hpp
python/triton/backends/nvidia/include/cuda_fp4.h
python/triton/backends/nvidia/include/cuda_fp4.hpp
python/triton/backends/nvidia/include/cuda_fp6.h
python/triton/backends/nvidia/include/cuda_fp6.hpp
python/triton/backends/nvidia/include/cuda_fp8.h
python/triton/backends/nvidia/include/cuda_fp8.hpp
python/triton/backends/nvidia/include/cuda_gl_interop.h
python/triton/backends/nvidia/include/cuda_occupancy.h
python/triton/backends/nvidia/include/cuda_pipeline.h
python/triton/backends/nvidia/include/cuda_pipeline_helpers.h
python/triton/backends/nvidia/include/cuda_pipeline_primitives.h
python/triton/backends/nvidia/include/cuda_runtime.h
python/triton/backends/nvidia/include/cuda_runtime_api.h
python/triton/backends/nvidia/include/cuda_stdint.h
python/triton/backends/nvidia/include/cuda_surface_types.h
python/triton/backends/nvidia/include/cuda_texture_types.h
python/triton/backends/nvidia/include/cuda_vdpau_interop.h
python/triton/backends/nvidia/include/cudart_platform.h
python/triton/backends/nvidia/include/cupti.h
python/triton/backends/nvidia/include/cupti_activity.h
python/triton/backends/nvidia/include/cupti_activity_deprecated.h
python/triton/backends/nvidia/include/cupti_callbacks.h
python/triton/backends/nvidia/include/cupti_checkpoint.h
python/triton/backends/nvidia/include/cupti_common.h
python/triton/backends/nvidia/include/cupti_driver_cbid.h
python/triton/backends/nvidia/include/cupti_events.h
python/triton/backends/nvidia/include/cupti_metrics.h
python/triton/backends/nvidia/include/cupti_nvtx_cbid.h
python/triton/backends/nvidia/include/cupti_pcsampling.h
python/triton/backends/nvidia/include/cupti_pcsampling_util.h
python/triton/backends/nvidia/include/cupti_pmsampling.h
python/triton/backends/nvidia/include/cupti_profiler_host.h
python/triton/backends/nvidia/include/cupti_profiler_target.h
python/triton/backends/nvidia/include/cupti_range_profiler.h
python/triton/backends/nvidia/include/cupti_result.h
python/triton/backends/nvidia/include/cupti_runtime_cbid.h
python/triton/backends/nvidia/include/cupti_sass_metrics.h
python/triton/backends/nvidia/include/cupti_target.h
python/triton/backends/nvidia/include/cupti_version.h
python/triton/backends/nvidia/include/device_atomic_functions.h
python/triton/backends/nvidia/include/device_atomic_functions.hpp
python/triton/backends/nvidia/include/device_double_functions.h
python/triton/backends/nvidia/include/device_functions.h
python/triton/backends/nvidia/include/device_launch_parameters.h
python/triton/backends/nvidia/include/device_types.h
python/triton/backends/nvidia/include/driver_functions.h
python/triton/backends/nvidia/include/driver_types.h
python/triton/backends/nvidia/include/fatbinary_section.h
python/triton/backends/nvidia/include/generated_cudaGL_meta.h
python/triton/backends/nvidia/include/generated_cudaVDPAU_meta.h
python/triton/backends/nvidia/include/generated_cuda_gl_interop_meta.h
python/triton/backends/nvidia/include/generated_cuda_meta.h
python/triton/backends/nvidia/include/generated_cuda_runtime_api_meta.h
python/triton/backends/nvidia/include/generated_cuda_vdpau_interop_meta.h
python/triton/backends/nvidia/include/generated_cudart_removed_meta.h
python/triton/backends/nvidia/include/generated_nvtx_meta.h
python/triton/backends/nvidia/include/host_config.h
python/triton/backends/nvidia/include/host_defines.h
python/triton/backends/nvidia/include/library_types.h
python/triton/backends/nvidia/include/math_constants.h
python/triton/backends/nvidia/include/math_functions.h
python/triton/backends/nvidia/include/mma.h
python/triton/backends/nvidia/include/nvPTXCompiler.h
python/triton/backends/nvidia/include/nvfunctional
python/triton/backends/nvidia/include/nvperf_common.h
python/triton/backends/nvidia/include/nvperf_cuda_host.h
python/triton/backends/nvidia/include/nvperf_host.h
python/triton/backends/nvidia/include/nvperf_target.h
python/triton/backends/nvidia/include/sm_20_atomic_functions.h
python/triton/backends/nvidia/include/sm_20_atomic_functions.hpp
python/triton/backends/nvidia/include/sm_20_intrinsics.h
python/triton/backends/nvidia/include/sm_20_intrinsics.hpp
python/triton/backends/nvidia/include/sm_30_intrinsics.h
python/triton/backends/nvidia/include/sm_30_intrinsics.hpp
python/triton/backends/nvidia/include/sm_32_atomic_functions.h
python/triton/backends/nvidia/include/sm_32_atomic_functions.hpp
python/triton/backends/nvidia/include/sm_32_intrinsics.h
python/triton/backends/nvidia/include/sm_32_intrinsics.hpp
python/triton/backends/nvidia/include/sm_35_atomic_functions.h
python/triton/backends/nvidia/include/sm_35_intrinsics.h
python/triton/backends/nvidia/include/sm_60_atomic_functions.h
python/triton/backends/nvidia/include/sm_60_atomic_functions.hpp
python/triton/backends/nvidia/include/sm_61_intrinsics.h
python/triton/backends/nvidia/include/sm_61_intrinsics.hpp
python/triton/backends/nvidia/include/surface_functions.h
python/triton/backends/nvidia/include/surface_indirect_functions.h
python/triton/backends/nvidia/include/surface_types.h
python/triton/backends/nvidia/include/texture_fetch_functions.h
python/triton/backends/nvidia/include/texture_indirect_functions.h
python/triton/backends/nvidia/include/texture_types.h
python/triton/backends/nvidia/include/vector_functions.h
python/triton/backends/nvidia/include/vector_functions.hpp
python/triton/backends/nvidia/include/vector_types.h
python/triton/backends/nvidia/include/Openacc/cupti_openacc.h
python/triton/backends/nvidia/include/Openmp/cupti_openmp.h
python/triton/backends/nvidia/include/Openmp/omp-tools.h
python/triton/backends/nvidia/include/cooperative_groups/memcpy_async.h
python/triton/backends/nvidia/include/cooperative_groups/reduce.h
python/triton/backends/nvidia/include/cooperative_groups/scan.h
python/triton/backends/nvidia/include/cooperative_groups/details/async.h
python/triton/backends/nvidia/include/cooperative_groups/details/coalesced_reduce.h
python/triton/backends/nvidia/include/cooperative_groups/details/coalesced_scan.h
python/triton/backends/nvidia/include/cooperative_groups/details/driver_abi.h
python/triton/backends/nvidia/include/cooperative_groups/details/functional.h
python/triton/backends/nvidia/include/cooperative_groups/details/helpers.h
python/triton/backends/nvidia/include/cooperative_groups/details/info.h
python/triton/backends/nvidia/include/cooperative_groups/details/invoke.h
python/triton/backends/nvidia/include/cooperative_groups/details/memory.h
python/triton/backends/nvidia/include/cooperative_groups/details/partitioning.h
python/triton/backends/nvidia/include/cooperative_groups/details/reduce.h
python/triton/backends/nvidia/include/cooperative_groups/details/scan.h
python/triton/backends/nvidia/include/cooperative_groups/details/sync.h
python/triton/backends/nvidia/include/crt/common_functions.h
python/triton/backends/nvidia/include/crt/cudacc_ext.h
python/triton/backends/nvidia/include/crt/device_double_functions.h
python/triton/backends/nvidia/include/crt/device_double_functions.hpp
python/triton/backends/nvidia/include/crt/device_fp128_functions.h
python/triton/backends/nvidia/include/crt/device_functions.h
python/triton/backends/nvidia/include/crt/device_functions.hpp
python/triton/backends/nvidia/include/crt/func_macro.h
python/triton/backends/nvidia/include/crt/host_config.h
python/triton/backends/nvidia/include/crt/host_defines.h
python/triton/backends/nvidia/include/crt/host_runtime.h
python/triton/backends/nvidia/include/crt/math_functions.h
python/triton/backends/nvidia/include/crt/math_functions.hpp
python/triton/backends/nvidia/include/crt/mma.h
python/triton/backends/nvidia/include/crt/mma.hpp
python/triton/backends/nvidia/include/crt/nvfunctional
python/triton/backends/nvidia/include/crt/sm_100_rt.h
python/triton/backends/nvidia/include/crt/sm_100_rt.hpp
python/triton/backends/nvidia/include/crt/sm_70_rt.h
python/triton/backends/nvidia/include/crt/sm_70_rt.hpp
python/triton/backends/nvidia/include/crt/sm_80_rt.h
python/triton/backends/nvidia/include/crt/sm_80_rt.hpp
python/triton/backends/nvidia/include/crt/sm_90_rt.h
python/triton/backends/nvidia/include/crt/sm_90_rt.hpp
python/triton/backends/nvidia/include/crt/storage_class.h
python/triton/backends/nvidia/lib/libdevice.10.bc
python/triton/backends/nvidia/lib/cupti/libcheckpoint.so
python/triton/backends/nvidia/lib/cupti/libcupti.so
python/triton/backends/nvidia/lib/cupti/libcupti.so.12
python/triton/backends/nvidia/lib/cupti/libcupti.so.2025.1.1
python/triton/backends/nvidia/lib/cupti/libcupti_static.a
python/triton/backends/nvidia/lib/cupti/libnvperf_host.so
python/triton/backends/nvidia/lib/cupti/libnvperf_target.so
python/triton/backends/nvidia/lib/cupti/libpcsamplingutil.so
python/triton/compiler/__init__.py
python/triton/compiler/code_generator.py
python/triton/compiler/compiler.py
python/triton/compiler/errors.py
python/triton/compiler/make_launcher.py
python/triton/language/__init__.py
python/triton/language/_utils.py
python/triton/language/core.py
python/triton/language/math.py
python/triton/language/random.py
python/triton/language/semantic.py
python/triton/language/standard.py
python/triton/language/extra/__init__.py
python/triton/language/extra/libdevice.py
python/triton/language/extra/cuda/__init__.py
python/triton/language/extra/cuda/gdc.py
python/triton/language/extra/cuda/libdevice.py
python/triton/language/extra/cuda/utils.py
python/triton/language/extra/hip/__init__.py
python/triton/language/extra/hip/libdevice.py
python/triton/profiler/__init__.py
python/triton/profiler/context.py
python/triton/profiler/flags.py
python/triton/profiler/hook.py
python/triton/profiler/language.py
python/triton/profiler/profile.py
python/triton/profiler/proton.py
python/triton/profiler/scope.py
python/triton/profiler/state.py
python/triton/profiler/viewer.py
python/triton/runtime/__init__.py
python/triton/runtime/_allocation.py
python/triton/runtime/autotuner.py
python/triton/runtime/build.py
python/triton/runtime/cache.py
python/triton/runtime/driver.py
python/triton/runtime/errors.py
python/triton/runtime/interpreter.py
python/triton/runtime/jit.py
python/triton/tools/__init__.py
python/triton/tools/build_extern.py
python/triton/tools/compile.py
python/triton/tools/disasm.py
python/triton/tools/link.py
python/triton/tools/mxfp.py
python/triton/tools/tensor_descriptor.py
python/triton/tools/extra/cuda/compile.c
python/triton/tools/extra/cuda/compile.h
python/triton_metal.egg-info/PKG-INFO
python/triton_metal.egg-info/SOURCES.txt
python/triton_metal.egg-info/dependency_links.txt
python/triton_metal.egg-info/entry_points.txt
python/triton_metal.egg-info/not-zip-safe
python/triton_metal.egg-info/requires.txt
python/triton_metal.egg-info/top_level.txt
test/CMakeLists.txt
test/lit.cfg.py
test/lit.site.cfg.py.in
test/Analysis/test-alias.mlir
test/Analysis/test-alignment.mlir
test/Analysis/test-allocation.mlir
test/Analysis/test-membar-ttng.mlir
test/Analysis/test-membar.mlir
test/Conversion/allocate_shared_memory.mlir
test/Conversion/allocate_warp_groups.mlir
test/Conversion/atomic_ldst.mlir
test/Conversion/cvt_to_llvm.mlir
test/Conversion/dedup-by-constancy.mlir
test/Conversion/divide-by-0.mlir
test/Conversion/gather_to_llvm.mlir
test/Conversion/nvgpu_to_llvm.mlir
test/Conversion/reduce_to_llvm.mlir
test/Conversion/relayout_tritongpu.mlir
test/Conversion/scan_to_llvm.mlir
test/Conversion/tma_to_llvm.mlir
test/Conversion/triton_to_tritongpu.mlir
test/Conversion/tritongpu_to_llvm.mlir
test/Conversion/tritongpu_to_llvm_blackwell.mlir
test/Conversion/tritongpu_to_llvm_block_dot_shortcut.mlir
test/Conversion/tritongpu_to_llvm_debug.mlir
test/Conversion/tritongpu_to_llvm_hopper.mlir
test/Conversion/tritongpu_to_llvm_hopper_ptx80.mlir
test/Conversion/tritongpu_to_llvm_volta.mlir
test/Conversion/tritongpu_to_ptx.mlir
test/Conversion/tritonnvidiagpu_to_llvm.mlir
test/Conversion/warp_specialize_to_llvm.mlir
test/Conversion/amd/amdgpu_membar.mlir
test/Conversion/amd/async-ops-alias-scopes.mlir
test/Conversion/amd/async_ops_to_llvm.mlir
test/Conversion/amd/async_ops_to_llvm_invalid.mlir
test/Conversion/amd/buffer_load_store.mlir
test/Conversion/amd/buffer_load_to_local_to_llvm.mlir
test/Conversion/amd/builtin_func_to_llvm.mlir
test/Conversion/amd/compute-base-ptr.mlir
test/Conversion/amd/dedup-by-constancy.mlir
test/Conversion/amd/ds_transpose.mlir
test/Conversion/amd/fdivide.mlir
test/Conversion/amd/fp_to_fp.mlir
test/Conversion/amd/in_thread_transpose.mlir
test/Conversion/amd/invalid_extractslice_to_llvm.mlir
test/Conversion/amd/load_store.mlir
test/Conversion/amd/math-denorm-handling.mlir
test/Conversion/amd/mfma-shortcut.mlir
test/Conversion/amd/minmax.mlir
test/Conversion/amd/tritongpu_to_llvm.mlir
test/Conversion/amd/tritongpu_to_llvm_rdna.mlir
test/Conversion/amd/tritongpu_wmma_dot_to_llvm.mlir
test/Hopper/CMakeLists.txt
test/Hopper/WarpSpecialization/ws_task_partition.mlir
test/LLVMIR/break-phi-struct.ll
test/NVWS/invalid.mlir
test/NVWS/lower_aref.mlir
test/NVWS/lower_warp_group.mlir
test/NVWS/ops.mlir
test/Proton/ops.mlir
test/Tools/tensor_layout_print.mlir
test/Triton/canonicalize.mlir
test/Triton/combine.mlir
test/Triton/invalid.mlir
test/Triton/loop-invariant-code-motion.mlir
test/Triton/loop-unroll.mlir
test/Triton/ops.mlir
test/Triton/reorder-broadcast.mlir
test/Triton/reproducer.mlir
test/Triton/rewrite-tensor-pointer.mlir
test/Triton/vecadd.mlir
test/Triton/verify-make-range.mlir
test/TritonGPU/WGMMAPrefetch.mlir
test/TritonGPU/accelerate-matmul.mlir
test/TritonGPU/accumulator-init.mlir
test/TritonGPU/atomic-cas.mlir
test/TritonGPU/automatic-warp-specialization.mlir
test/TritonGPU/canonicalize.mlir
test/TritonGPU/coalesce-async-copy.mlir
test/TritonGPU/coalesce.mlir
test/TritonGPU/combine-select-if.mlir
test/TritonGPU/combine.mlir
test/TritonGPU/dot-operands.mlir
test/TritonGPU/fence-inserstion.mlir
test/TritonGPU/fuse-nested-loops.mlir
test/TritonGPU/global_scratch_alloc.mlir
test/TritonGPU/global_scratch_to_llvm.mlir
test/TritonGPU/hoist-tmem-alloc.mlir
test/TritonGPU/invalid-attributes.mlir
test/TritonGPU/invalid.mlir
test/TritonGPU/load-mma-specialization.mlir
test/TritonGPU/loop-pipeline-async-latencies.mlir
test/TritonGPU/loop-pipeline-blackwell.mlir
test/TritonGPU/loop-pipeline-cuda.mlir
test/TritonGPU/loop-pipeline-expand.mlir
test/TritonGPU/loop-pipeline-hip.mlir
test/TritonGPU/loop-pipeline-hopper-remove-wait.mlir
test/TritonGPU/loop-pipeline-hopper.mlir
test/TritonGPU/loop-pipeline-indirect-load.mlir
test/TritonGPU/loop-pipeline.mlir
test/TritonGPU/loop-schedule.mlir
test/TritonGPU/matmul-loop-pipeline.mlir
test/TritonGPU/matmul.mlir
test/TritonGPU/ops.mlir
test/TritonGPU/optimize-locality.mlir
test/TritonGPU/optimize-partition-warps.mlir
test/TritonGPU/optimize_epilogue.mlir
test/TritonGPU/partition-loops.mlir
test/TritonGPU/pipeline-assign-latencies.mlir
test/TritonGPU/pipeline-loop-nest.mlir
test/TritonGPU/pipeline-lower-loop.mlir
test/TritonGPU/pipeline-schedule-loop.mlir
test/TritonGPU/prefetch.mlir
test/TritonGPU/promote-lhs-to-tmem.mlir
test/TritonGPU/reduce-data-duplication.mlir
test/TritonGPU/reorder-instructions.mlir
test/TritonGPU/rewrite-partition-dependencies.mlir
test/TritonGPU/tf32x3-matmul.mlir
test/TritonGPU/verify-blocked-layout.mlir
test/TritonGPU/amd/accelerate-amd-matmul-chain-dot.mlir
test/TritonGPU/amd/accelerate-amd-matmul-fma.mlir
test/TritonGPU/amd/accelerate-amd-matmul-mfma-gfx950.mlir
test/TritonGPU/amd/accelerate-amd-matmul-mfma.mlir
test/TritonGPU/amd/accelerate-amd-matmul-wmma-gen1.mlir
test/TritonGPU/amd/accelerate-amd-matmul-wmma-gen2.mlir
test/TritonGPU/amd/amd-block-pingpong.mlir
test/TritonGPU/amd/amd-canonicalize-pointers-dont-run-mlir-canonicalizer.mlir
test/TritonGPU/amd/amd-canonicalize-pointers.mlir
test/TritonGPU/amd/amd-coalesce-async-copy.mlir
test/TritonGPU/amd/amd-conditional-barrier.mlir
test/TritonGPU/amd/amd-convert-buffer-ops-range-analysis.mlir
test/TritonGPU/amd/amd-convert-buffer-ops.mlir
test/TritonGPU/amd/amd-extractslice-op.mlir
test/TritonGPU/amd/amd-fold-true-cmpi.mlir
test/TritonGPU/amd/amd-hoist-cvtToDotOp.mlir
test/TritonGPU/amd/amd-instruction-sched.mlir
test/TritonGPU/amd/amd-optimize-epilogue.mlir
test/TritonGPU/amd/amd-range-analysis.mlir
test/TritonGPU/amd/amd-reorder-instructions.mlir
test/TritonGPU/amd/amd-sched-2nd-load.mlir
test/TritonGPU/amd/amd-schedule-hint.mlir
test/TritonGPU/amd/amd-stream-loop-assume.mlir
test/TritonGPU/amd/amd-stream-prefetch.mlir
test/TritonGPU/amd/amd-update-async-wait-count.mlir
test/TritonGPU/amd/in-thread-transpose.mlir
test/TritonGPU/amd/invalid.mlir
test/TritonGPU/amd/mfma-double-rate.mlir
test/TritonGPU/amd/mfma-xf32.mlir
test/TritonGPU/amd/optimize-lds-usage.mlir
test/TritonGPU/amd/sink-setprio-mfma.mlir
test/TritonGPU/samples/descriptor-matmul-pipeline.mlir
test/TritonGPU/samples/descriptor-matmul-pipeline.mlir.in
test/TritonGPU/samples/simulated-grouped-gemm.mlir
test/TritonGPU/samples/simulated-grouped-gemm.mlir.in
test/TritonNvidiaGPU/bf16-atomics.mlir
test/TritonNvidiaGPU/canonicalize.mlir
test/TritonNvidiaGPU/invalid.mlir
test/TritonNvidiaGPU/membar.mlir
test/TritonNvidiaGPU/mma_lowering.mlir
test/TritonNvidiaGPU/ops.mlir
test/TritonNvidiaGPU/optimize_descriptor_encoding.mlir
test/TritonNvidiaGPU/test_promotion_to_tensor_memory.mlir
test/TritonNvidiaGPU/test_tensor_memory_allocation.mlir
test/TritonNvidiaGPU/tma_lowering.mlir
test/TritonNvidiaGPU/tmem_layouts.mlir
test/lib/CMakeLists.txt
test/lib/Analysis/CMakeLists.txt
test/lib/Analysis/TestAlias.cpp
test/lib/Analysis/TestAllocation.cpp
test/lib/Analysis/TestAxisInfo.cpp
test/lib/Analysis/TestMembar.cpp
test/lib/Instrumentation/CMakeLists.txt
test/lib/Instrumentation/GPUHello.cpp
third_party/amd/CMakeLists.txt
third_party/amd/backend/__init__.py
third_party/amd/backend/compiler.py
third_party/amd/backend/driver.c
third_party/amd/backend/driver.py
third_party/amd/backend/include/hip/channel_descriptor.h
third_party/amd/backend/include/hip/device_functions.h
third_party/amd/backend/include/hip/driver_types.h
third_party/amd/backend/include/hip/hip_bf16.h
third_party/amd/backend/include/hip/hip_bfloat16.h
third_party/amd/backend/include/hip/hip_common.h
third_party/amd/backend/include/hip/hip_complex.h
third_party/amd/backend/include/hip/hip_cooperative_groups.h
third_party/amd/backend/include/hip/hip_deprecated.h
third_party/amd/backend/include/hip/hip_ext.h
third_party/amd/backend/include/hip/hip_fp16.h
third_party/amd/backend/include/hip/hip_fp8.h
third_party/amd/backend/include/hip/hip_gl_interop.h
third_party/amd/backend/include/hip/hip_hcc.h
third_party/amd/backend/include/hip/hip_math_constants.h
third_party/amd/backend/include/hip/hip_profile.h
third_party/amd/backend/include/hip/hip_runtime.h
third_party/amd/backend/include/hip/hip_runtime_api.h
third_party/amd/backend/include/hip/hip_texture_types.h
third_party/amd/backend/include/hip/hip_vector_types.h
third_party/amd/backend/include/hip/hip_version.h
third_party/amd/backend/include/hip/hiprtc.h
third_party/amd/backend/include/hip/library_types.h
third_party/amd/backend/include/hip/math_functions.h
third_party/amd/backend/include/hip/surface_types.h
third_party/amd/backend/include/hip/texture_types.h
third_party/amd/backend/include/hip/amd_detail/amd_channel_descriptor.h
third_party/amd/backend/include/hip/amd_detail/amd_device_functions.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_atomic.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_bf16.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_bfloat16.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_common.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_complex.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_cooperative_groups.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_fp16.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_fp8.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_gl_interop.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_math_constants.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_runtime.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_runtime_pt_api.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_unsafe_atomics.h
third_party/amd/backend/include/hip/amd_detail/amd_hip_vector_types.h
third_party/amd/backend/include/hip/amd_detail/amd_math_functions.h
third_party/amd/backend/include/hip/amd_detail/amd_surface_functions.h
third_party/amd/backend/include/hip/amd_detail/amd_warp_functions.h
third_party/amd/backend/include/hip/amd_detail/amd_warp_sync_functions.h
third_party/amd/backend/include/hip/amd_detail/concepts.hpp
third_party/amd/backend/include/hip/amd_detail/device_library_decls.h
third_party/amd/backend/include/hip/amd_detail/functional_grid_launch.hpp
third_party/amd/backend/include/hip/amd_detail/grid_launch.h
third_party/amd/backend/include/hip/amd_detail/grid_launch.hpp
third_party/amd/backend/include/hip/amd_detail/grid_launch_GGL.hpp
third_party/amd/backend/include/hip/amd_detail/helpers.hpp
third_party/amd/backend/include/hip/amd_detail/hip_api_trace.hpp
third_party/amd/backend/include/hip/amd_detail/hip_assert.h
third_party/amd/backend/include/hip/amd_detail/hip_cooperative_groups_helper.h
third_party/amd/backend/include/hip/amd_detail/hip_fp16_gcc.h
third_party/amd/backend/include/hip/amd_detail/hip_fp16_math_fwd.h
third_party/amd/backend/include/hip/amd_detail/hip_ldg.h
third_party/amd/backend/include/hip/amd_detail/hip_prof_str.h
third_party/amd/backend/include/hip/amd_detail/hip_runtime_prof.h
third_party/amd/backend/include/hip/amd_detail/host_defines.h
third_party/amd/backend/include/hip/amd_detail/hsa_helpers.hpp
third_party/amd/backend/include/hip/amd_detail/macro_based_grid_launch.hpp
third_party/amd/backend/include/hip/amd_detail/math_fwd.h
third_party/amd/backend/include/hip/amd_detail/ockl_image.h
third_party/amd/backend/include/hip/amd_detail/program_state.hpp
third_party/amd/backend/include/hip/amd_detail/texture_fetch_functions.h
third_party/amd/backend/include/hip/amd_detail/texture_indirect_functions.h
third_party/amd/backend/include/hsa/Brig.h
third_party/amd/backend/include/hsa/amd_hsa_common.h
third_party/amd/backend/include/hsa/amd_hsa_elf.h
third_party/amd/backend/include/hsa/amd_hsa_kernel_code.h
third_party/amd/backend/include/hsa/amd_hsa_queue.h
third_party/amd/backend/include/hsa/amd_hsa_signal.h
third_party/amd/backend/include/hsa/hsa.h
third_party/amd/backend/include/hsa/hsa_amd_tool.h
third_party/amd/backend/include/hsa/hsa_api_trace.h
third_party/amd/backend/include/hsa/hsa_api_trace_version.h
third_party/amd/backend/include/hsa/hsa_ext_amd.h
third_party/amd/backend/include/hsa/hsa_ext_finalize.h
third_party/amd/backend/include/hsa/hsa_ext_image.h
third_party/amd/backend/include/hsa/hsa_ven_amd_aqlprofile.h
third_party/amd/backend/include/hsa/hsa_ven_amd_loader.h
third_party/amd/backend/include/hsa/hsa_ven_amd_pc_sampling.h
third_party/amd/backend/include/roctracer/hip_ostream_ops.h
third_party/amd/backend/include/roctracer/hsa_ostream_ops.h
third_party/amd/backend/include/roctracer/hsa_prof_str.h
third_party/amd/backend/include/roctracer/roctracer.h
third_party/amd/backend/include/roctracer/roctracer_ext.h
third_party/amd/backend/include/roctracer/roctracer_hcc.h
third_party/amd/backend/include/roctracer/roctracer_hip.h
third_party/amd/backend/include/roctracer/roctracer_hsa.h
third_party/amd/backend/include/roctracer/roctracer_plugin.h
third_party/amd/backend/include/roctracer/roctracer_roctx.h
third_party/amd/backend/include/roctracer/roctx.h
third_party/amd/backend/include/roctracer/ext/prof_protocol.h
third_party/amd/backend/lib/asanrtl.bc
third_party/amd/backend/lib/ockl.bc
third_party/amd/backend/lib/ocml.bc
third_party/amd/include/CMakeLists.txt
third_party/amd/include/Analysis/RangeAnalysis.h
third_party/amd/include/Dialect/CMakeLists.txt
third_party/amd/include/Dialect/TritonAMDGPU/CMakeLists.txt
third_party/amd/include/Dialect/TritonAMDGPU/IR/CMakeLists.txt
third_party/amd/include/Dialect/TritonAMDGPU/IR/Dialect.h
third_party/amd/include/Dialect/TritonAMDGPU/IR/TritonAMDGPUAttrDefs.td
third_party/amd/include/Dialect/TritonAMDGPU/IR/TritonAMDGPUDialect.td
third_party/amd/include/Dialect/TritonAMDGPU/IR/TritonAMDGPUOps.td
third_party/amd/include/Dialect/TritonAMDGPU/Utility/CommonUtils.h
third_party/amd/include/TritonAMDGPUToLLVM/CMakeLists.txt
third_party/amd/include/TritonAMDGPUToLLVM/GCNAsmFormat.h
third_party/amd/include/TritonAMDGPUToLLVM/MembarUtility.h
third_party/amd/include/TritonAMDGPUToLLVM/Passes.h
third_party/amd/include/TritonAMDGPUToLLVM/Passes.td
third_party/amd/include/TritonAMDGPUToLLVM/PatternTritonAMDGPUToLLVM.h
third_party/amd/include/TritonAMDGPUToLLVM/TargetUtils.h
third_party/amd/include/TritonAMDGPUTransforms/CMakeLists.txt
third_party/amd/include/TritonAMDGPUTransforms/MfmaGroup.h
third_party/amd/include/TritonAMDGPUTransforms/Passes.h
third_party/amd/include/TritonAMDGPUTransforms/Passes.td
third_party/amd/include/TritonAMDGPUTransforms/TritonGPUConversion.h
third_party/amd/language/hip/__init__.py
third_party/amd/language/hip/libdevice.py
third_party/amd/lib/CMakeLists.txt
third_party/amd/lib/Analysis/CMakeLists.txt
third_party/amd/lib/Analysis/RangeAnalysis.cpp
third_party/amd/lib/Dialect/CMakeLists.txt
third_party/amd/lib/Dialect/TritonAMDGPU/CMakeLists.txt
third_party/amd/lib/Dialect/TritonAMDGPU/IR/CMakeLists.txt
third_party/amd/lib/Dialect/TritonAMDGPU/IR/Dialect.cpp
third_party/amd/lib/Dialect/TritonAMDGPU/Utility/CMakeLists.txt
third_party/amd/lib/Dialect/TritonAMDGPU/Utility/CommonUtils.cpp
third_party/amd/lib/TritonAMDGPUDialectToLLVM/CMakeLists.txt
third_party/amd/lib/TritonAMDGPUDialectToLLVM/ExtractSliceOpToLLVM.cpp
third_party/amd/lib/TritonAMDGPUDialectToLLVM/InThreadTransposeOpToTTG.cpp
third_party/amd/lib/TritonAMDGPUDialectToLLVM/TritonAMDGPUToLLVMPatterns.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/AtomicRMWOpsEmitter.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/AtomicRMWOpsEmitter.h
third_party/amd/lib/TritonAMDGPUToLLVM/BufferOpsEmitter.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/BufferOpsEmitter.h
third_party/amd/lib/TritonAMDGPUToLLVM/BuiltinFuncToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/CMakeLists.txt
third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/ElementwiseOpToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/GCNAsmFormat.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/LoadStoreOpToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/MembarUtility.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/MemoryOpToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/OptimizeLDSUsage.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/OptimizeLDSUtility.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/OptimizeLDSUtility.h
third_party/amd/lib/TritonAMDGPUToLLVM/PatternTritonGPUOpToLLVM.h
third_party/amd/lib/TritonAMDGPUToLLVM/SPMDOpToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/ScalarizePackedFOps.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/SchedInstructions.h
third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/TargetInfo.h
third_party/amd/lib/TritonAMDGPUToLLVM/TargetUtils.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/TritonGPUToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/UpcastMXFPToLLVM.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/Utility.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/Utility.h
third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandHelper.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandHelper.h
third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandMFMA.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/ConvertLayoutOpToLLVM/SharedToDotOperandWMMA.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/FMA.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/MFMA.cpp
third_party/amd/lib/TritonAMDGPUToLLVM/DotOpToLLVM/WMMA.cpp
third_party/amd/lib/TritonAMDGPUTransforms/AccelerateAMDMatmul.cpp
third_party/amd/lib/TritonAMDGPUTransforms/BlockPingpong.cpp
third_party/amd/lib/TritonAMDGPUTransforms/CMakeLists.txt
third_party/amd/lib/TritonAMDGPUTransforms/CanonicalizePointers.cpp
third_party/amd/lib/TritonAMDGPUTransforms/CoalesceAsyncCopy.cpp
third_party/amd/lib/TritonAMDGPUTransforms/ConvertToBufferOps.cpp
third_party/amd/lib/TritonAMDGPUTransforms/FoldTrueCmpIOp.cpp
third_party/amd/lib/TritonAMDGPUTransforms/HoistLayoutConversions.cpp
third_party/amd/lib/TritonAMDGPUTransforms/InThreadTranspose.cpp
third_party/amd/lib/TritonAMDGPUTransforms/MfmaGroup.cpp
third_party/amd/lib/TritonAMDGPUTransforms/OptimizeEpilogue.cpp
third_party/amd/lib/TritonAMDGPUTransforms/ReorderInstructions.cpp
third_party/amd/lib/TritonAMDGPUTransforms/StreamPipeline.cpp
third_party/amd/lib/TritonAMDGPUTransforms/UpdateAsyncWaitCount.cpp
third_party/amd/lib/TritonAMDGPUTransforms/Utility.cpp
third_party/amd/lib/TritonAMDGPUTransforms/Utility.h
third_party/amd/python/triton_amd.cc
third_party/amd/python/test/address_sanitizer_helper.py
third_party/amd/python/test/attn_fwd.ttir
third_party/amd/python/test/test_address_sanitizer.py
third_party/amd/python/test/test_extract_slice.py
third_party/amd/python/test/test_scalarize_packed_fops.py
third_party/amd/test/CMakeLists.txt
third_party/amd/test/lib/CMakeLists.txt
third_party/amd/test/lib/Analysis/CMakeLists.txt
third_party/amd/test/lib/Analysis/TestAMDGPUMembar.cpp
third_party/amd/test/lib/Analysis/TestAMDRangeAnalysis.cpp
third_party/amd/unittest/CMakeLists.txt
third_party/amd/unittest/Conversion/CMakeLists.txt
third_party/amd/unittest/Conversion/OptimizeLDSTest.cpp
third_party/f2reduce/CMakeLists.txt
third_party/f2reduce/LICENCE.txt
third_party/f2reduce/README.md
third_party/f2reduce/VERSION
third_party/f2reduce/f2reduce.cpp
third_party/f2reduce/f2reduce.h
third_party/llvm/dummy_llvm/bin/llvm-config
third_party/llvm/dummy_llvm/include/llvm/IR.h
third_party/llvm/dummy_llvm/include/llvm-c/Core.h
third_party/llvm/dummy_llvm/lib/libLLVM.so
third_party/metal/.DS_Store
third_party/metal/CMakeLists.txt
third_party/metal/README.md
third_party/metal/backend/__init__.py
third_party/metal/backend/compiler.py
third_party/metal/backend/driver.py
third_party/metal/backend/executor.py
third_party/metal/backend/mlx_backend.py
third_party/metal/backend/__pycache__/__init__.cpython-310.pyc
third_party/metal/backend/__pycache__/compiler.cpython-310.pyc
third_party/metal/backend/__pycache__/driver.cpython-310.pyc
third_party/metal/language/metal/__init__.py
third_party/metal/language/metal/libdevice.py
third_party/metal/language/metal/utils.py
third_party/metal/python/.DS_Store
third_party/metal/python/CMakeLists.txt
third_party/metal/python/README.md
third_party/metal/python/__init__.py
third_party/metal/python/M3/__init__.py
third_party/metal/python/M3/m3_fusion_optimizer.py
third_party/metal/python/M3/m3_graph_optimizer.py
third_party/metal/python/M3/m3_memory_manager.py
third_party/metal/python/M3/m3_optimizations.py
third_party/metal/python/MLX/__init__.py
third_party/metal/python/MLX/advanced_memory_patterns.py
third_party/metal/python/MLX/complex_ops.py
third_party/metal/python/MLX/control_flow_optimizer.py
third_party/metal/python/MLX/launcher.py
third_party/metal/python/MLX/memory_layout.py
third_party/metal/python/MLX/memory_layout_optimizer.py
third_party/metal/python/MLX/metal_auto_tuner.py
third_party/metal/python/MLX/metal_backend.py
third_party/metal/python/MLX/metal_backend_test.py
third_party/metal/python/MLX/metal_fusion_optimizer.py
third_party/metal/python/MLX/metal_hardware_optimizer.py
third_party/metal/python/MLX/metal_instrumentation.py
third_party/metal/python/MLX/metal_ir_transforms.py
third_party/metal/python/MLX/metal_memory_manager.py
third_party/metal/python/MLX/metal_operation_fusion.py
third_party/metal/python/MLX/metal_optimizing_compiler.py
third_party/metal/python/MLX/metal_performance_shaders.py
third_party/metal/python/MLX/mlx_bridge.py
third_party/metal/python/MLX/mlx_graph_optimizer.py
third_party/metal/python/MLX/operation_mapping.py
third_party/metal/python/MLX/package_metal_backend.py
third_party/metal/python/MLX/special_ops.py
third_party/metal/python/MLX/sync_converter.py
third_party/metal/python/MLX/thread_mapping.py
third_party/metal/python/MLX/triton_to_metal_converter.py
third_party/metal/python/MLX/__pycache__/complex_ops.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/control_flow_optimizer.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/launcher.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/memory_layout.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/memory_layout_optimizer.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_auto_tuner.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_backend.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_fusion_optimizer.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_hardware_optimizer.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_instrumentation.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_ir_transforms.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_memory_manager.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_operation_fusion.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_optimizing_compiler.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/metal_performance_shaders.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/mlx_bridge.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/mlx_graph_optimizer.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/operation_mapping.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/special_ops.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/sync_converter.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/thread_mapping.cpython-310.pyc
third_party/metal/python/MLX/__pycache__/triton_to_metal_converter.cpython-310.pyc
third_party/metal/python/__pycache__/__init__.cpython-310.pyc
third_party/metal/python/__pycache__/test_check_system.cpython-310.pyc
third_party/metal/python/benchmark/__init__.py
third_party/metal/python/benchmark/benchmark_sync_primitives.py
third_party/metal/python/benchmark/m3_benchmark.py
third_party/metal/python/benchmark/metal_backend_benchmark.py
third_party/metal/python/benchmark/__pycache__/__init__.cpython-310.pyc
third_party/metal/python/benchmark/__pycache__/metal_backend_benchmark.cpython-310.pyc
third_party/metal/python/benchmark/plots/add_comparison.png
third_party/metal/python/benchmark/plots/attention_comparison.png
third_party/metal/python/benchmark/plots/exp_comparison.png
third_party/metal/python/benchmark/plots/matmul_comparison.png
third_party/metal/python/benchmark/plots/matrix_multiplication_20250514_105023.png
third_party/metal/python/benchmark/plots/mean_comparison.png
third_party/metal/python/benchmark/plots/mul_comparison.png
third_party/metal/python/benchmark/plots/softmax_comparison.png
third_party/metal/python/benchmark/plots/speedup_by_operation.png
third_party/metal/python/benchmark/plots/sum_comparison.png
third_party/metal/python/benchmark/plots/tanh_comparison.png
third_party/metal/python/benchmark_results/all_results.txt
third_party/metal/python/benchmark_results/atomic_add_benchmark.metal
third_party/metal/python/benchmark_results/atomic_add_results.txt
third_party/metal/python/benchmark_results/atomic_max_benchmark.metal
third_party/metal/python/benchmark_results/atomic_max_results.txt
third_party/metal/python/benchmark_results/atomic_min_benchmark.metal
third_party/metal/python/benchmark_results/atomic_min_results.txt
third_party/metal/python/benchmark_results/atomic_xchg_benchmark.metal
third_party/metal/python/benchmark_results/atomic_xchg_results.txt
third_party/metal/python/benchmark_results/barrier_benchmark.metal
third_party/metal/python/benchmark_results/barrier_results.txt
third_party/metal/python/benchmark_results/reduction_direct_atomic_benchmark.metal
third_party/metal/python/benchmark_results/reduction_direct_atomic_results.txt
third_party/metal/python/benchmark_results/reduction_hierarchical_benchmark.metal
third_party/metal/python/benchmark_results/reduction_hierarchical_results.txt
third_party/metal/python/benchmark_results/reduction_shared_memory_benchmark.metal
third_party/metal/python/benchmark_results/reduction_shared_memory_results.txt
third_party/metal/python/benchmark_results/plots/atomic_operations.png
third_party/metal/python/benchmark_results/plots/reduction_strategies.png
third_party/metal/python/docs/ARCHITECTURE.md
third_party/metal/python/docs/COALESCED_LAYOUT.md
third_party/metal/python/docs/COALESCED_TODO.md
third_party/metal/python/docs/CONTRIBUTING.md
third_party/metal/python/docs/DEVELOPMENT_SUMMARY.md
third_party/metal/python/docs/ENGLISH_TRANSLATION.md
third_party/metal/python/docs/IMPLEMENTATION_SUMMARY.md
third_party/metal/python/docs/INSTALLATION_GUIDE.md
third_party/metal/python/docs/M3_OPTIMIZATIONS copy.md
third_party/metal/python/docs/M3_OPTIMIZATIONS.md
third_party/metal/python/docs/METAL_OPTIMIZATIONS.md
third_party/metal/python/docs/PERFORMANCE_OPTIMIZATION.md
third_party/metal/python/docs/README_METAL_OPTIMIZERS.md
third_party/metal/python/docs/README_special_ops.md
third_party/metal/python/docs/ROADMAP.md
third_party/metal/python/docs/TESTING.md
third_party/metal/python/docs/TROUBLESHOOTING.md
third_party/metal/python/examples/__init__.py
third_party/metal/python/examples/backend_comparison_example.py
third_party/metal/python/examples/convolution_example.py
third_party/metal/python/examples/matmul_example.py
third_party/metal/python/examples/metal_autotuner_conv_example.py
third_party/metal/python/examples/metal_autotuner_example.py
third_party/metal/python/examples/metal_backend_demo.py
third_party/metal/python/examples/reduction_example.py
third_party/metal/python/examples/simple_mlx_example.py
third_party/metal/python/examples/transformer_example.py
third_party/metal/python/examples/vector_add.py
third_party/metal/python/examples/__pycache__/__init__.cpython-310.pyc
third_party/metal/python/tests/README.md
third_party/metal/python/tests/check_environment.py
third_party/metal/python/tests/compare_cuda_metal.py
third_party/metal/python/tests/metal_backend_test.py
third_party/metal/python/tests/run_tests.py
third_party/metal/python/tests/test_basic_ops.py
third_party/metal/python/tests/test_check_system.py
third_party/metal/python/tests/test_chip_compatibility.py
third_party/metal/python/tests/test_complex_ops.py
third_party/metal/python/tests/test_control_flow_optimizer.py
third_party/metal/python/tests/test_debug.py
third_party/metal/python/tests/test_end_to_end.py
third_party/metal/python/tests/test_enum.py
third_party/metal/python/tests/test_hardware_comparison.py
third_party/metal/python/tests/test_integration.py
third_party/metal/python/tests/test_ir.json
third_party/metal/python/tests/test_m3_graph_optimizer.py
third_party/metal/python/tests/test_m3_integration copy.py
third_party/metal/python/tests/test_m3_integration.py
third_party/metal/python/tests/test_m3_memory_manager.py
third_party/metal/python/tests/test_m3_optimizations copy.py
third_party/metal/python/tests/test_m3_optimizations.py
third_party/metal/python/tests/test_memory_layout_optimizer.py
third_party/metal/python/tests/test_metal_auto_tuner.py
third_party/metal/python/tests/test_metal_backend.py
third_party/metal/python/tests/test_metal_backend_integration.py
third_party/metal/python/tests/test_metal_detection.py
third_party/metal/python/tests/test_metal_instrumentation.py
third_party/metal/python/tests/test_metal_ir_transforms.py
third_party/metal/python/tests/test_metal_memory_manager.py
third_party/metal/python/tests/test_metal_operation_fusion.py
third_party/metal/python/tests/test_metal_optimizing_compiler.py
third_party/metal/python/tests/test_metal_triton_frontend.py
third_party/metal/python/tests/test_mlx_graph_optimizer.py
third_party/metal/python/tests/test_operation_mapping.py
third_party/metal/python/tests/test_performance.py
third_party/metal/python/tests/test_performance_benchmark.py
third_party/metal/python/tests/test_reduction_memory.py
third_party/metal/python/tests/test_reduction_performance.py
third_party/metal/python/tests/test_special_ops copy 2.py
third_party/metal/python/tests/test_special_ops copy.py
third_party/metal/python/tests/test_special_ops.py
third_party/metal/python/tests/test_special_ops_edge_cases.py
third_party/metal/python/tests/test_sync_primitives.py
third_party/metal/python/tests/test_sync_primitives_edge_cases.py
third_party/metal/python/tests/test_translation copy.py
third_party/metal/python/tests/test_translation.py
third_party/metal/python/tests/test_triton_integration.py
third_party/metal/python/tests/test_triton_to_mlx_converter.py
third_party/metal/python/tests/__pycache__/test_basic_ops.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_check_system.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_chip_compatibility.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_complex_ops.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_control_flow_optimizer.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_debug.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_end_to_end.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_enum.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_hardware_comparison.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_integration.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_m3_graph_optimizer.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_m3_integration.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_m3_memory_manager.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_m3_optimizations.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_memory_layout_optimizer.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_auto_tuner.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_backend.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_backend_integration.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_detection.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_instrumentation.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_ir_transforms.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_memory_manager.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_operation_fusion.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_optimizing_compiler.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_metal_triton_frontend.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_mlx_graph_optimizer.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_operation_mapping.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_performance_benchmark.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_reduction_memory.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_reduction_performance.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_special_ops.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_special_ops_edge_cases.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_sync_primitives.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_sync_primitives_edge_cases.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_triton_integration.cpython-310.pyc
third_party/metal/python/tests/__pycache__/test_triton_to_mlx_converter.cpython-310.pyc
third_party/metal/python/tests/test_data/comprehensive_analysis.json
third_party/metal/python/tests/test_data/large_sample_analysis.json
third_party/metal/python/tests/test_data/large_sample_ops.json
third_party/metal/python/tests/test_data/sample_analysis.json
third_party/metal/python/tests/test_data/sample_ops.json
third_party/metal/python/tests/test_data/test_output.json
third_party/metal/python/tests/test_results/bessel_j0_performance.png
third_party/metal/python/tests/test_results/erf_performance.png
third_party/metal/python/tests/test_results/fast_sigmoid_performance.png
third_party/metal/python/tests/test_results/fast_tanh_performance.png
third_party/metal/python/tests/test_results/lgamma_performance.png
third_party/metal/python/tests/test_results/speedup_comparison.png
third_party/metal/python/tools/README.md
third_party/metal/python/tools/analyze_memory_layouts.py
third_party/metal/python/tools/benchmark_reduction_layouts.py
third_party/metal/python/tools/check_system.py
third_party/metal/python/tools/create_sample_ops.py
third_party/metal/python/tools/integration_test.py
third_party/metal/python/tools/large_sample_ops.json
third_party/metal/python/tools/sample_kernel.py
third_party/metal/python/tools/sample_ops.json
third_party/metal/python/tools/sample_reduction_kernel.py
third_party/metal/python/tools/simple_analyzer.py
third_party/metal/python/tools/test_simple_analyzer.py
third_party/metal/python/tutorials/README.md
third_party/metal/python/tutorials/test_tutorial_metal_compatibility.py
third_party/metal/python/tutorials/tutorial_README.md
third_party/metal/python/tutorials/tutorial_metal_compatibility.py
third_party/nvidia/CMakeLists.txt
third_party/nvidia/triton_nvidia.cc
third_party/nvidia/backend/__init__.py
third_party/nvidia/backend/compiler.py
third_party/nvidia/backend/driver.c
third_party/nvidia/backend/driver.py
third_party/nvidia/backend/bin/cuobjdump
third_party/nvidia/backend/bin/nvdisasm
third_party/nvidia/backend/bin/ptxas
third_party/nvidia/backend/include/builtin_types.h
third_party/nvidia/backend/include/channel_descriptor.h
third_party/nvidia/backend/include/common_functions.h
third_party/nvidia/backend/include/cooperative_groups.h
third_party/nvidia/backend/include/cuComplex.h
third_party/nvidia/backend/include/cuda.h
third_party/nvidia/backend/include/cudaEGL.h
third_party/nvidia/backend/include/cudaEGLTypedefs.h
third_party/nvidia/backend/include/cudaGL.h
third_party/nvidia/backend/include/cudaGLTypedefs.h
third_party/nvidia/backend/include/cudaProfilerTypedefs.h
third_party/nvidia/backend/include/cudaTypedefs.h
third_party/nvidia/backend/include/cudaVDPAU.h
third_party/nvidia/backend/include/cudaVDPAUTypedefs.h
third_party/nvidia/backend/include/cuda_awbarrier.h
third_party/nvidia/backend/include/cuda_awbarrier_helpers.h
third_party/nvidia/backend/include/cuda_awbarrier_primitives.h
third_party/nvidia/backend/include/cuda_bf16.h
third_party/nvidia/backend/include/cuda_bf16.hpp
third_party/nvidia/backend/include/cuda_device_runtime_api.h
third_party/nvidia/backend/include/cuda_egl_interop.h
third_party/nvidia/backend/include/cuda_fp16.h
third_party/nvidia/backend/include/cuda_fp16.hpp
third_party/nvidia/backend/include/cuda_fp4.h
third_party/nvidia/backend/include/cuda_fp4.hpp
third_party/nvidia/backend/include/cuda_fp6.h
third_party/nvidia/backend/include/cuda_fp6.hpp
third_party/nvidia/backend/include/cuda_fp8.h
third_party/nvidia/backend/include/cuda_fp8.hpp
third_party/nvidia/backend/include/cuda_gl_interop.h
third_party/nvidia/backend/include/cuda_occupancy.h
third_party/nvidia/backend/include/cuda_pipeline.h
third_party/nvidia/backend/include/cuda_pipeline_helpers.h
third_party/nvidia/backend/include/cuda_pipeline_primitives.h
third_party/nvidia/backend/include/cuda_runtime.h
third_party/nvidia/backend/include/cuda_runtime_api.h
third_party/nvidia/backend/include/cuda_stdint.h
third_party/nvidia/backend/include/cuda_surface_types.h
third_party/nvidia/backend/include/cuda_texture_types.h
third_party/nvidia/backend/include/cuda_vdpau_interop.h
third_party/nvidia/backend/include/cudart_platform.h
third_party/nvidia/backend/include/cupti.h
third_party/nvidia/backend/include/cupti_activity.h
third_party/nvidia/backend/include/cupti_activity_deprecated.h
third_party/nvidia/backend/include/cupti_callbacks.h
third_party/nvidia/backend/include/cupti_checkpoint.h
third_party/nvidia/backend/include/cupti_common.h
third_party/nvidia/backend/include/cupti_driver_cbid.h
third_party/nvidia/backend/include/cupti_events.h
third_party/nvidia/backend/include/cupti_metrics.h
third_party/nvidia/backend/include/cupti_nvtx_cbid.h
third_party/nvidia/backend/include/cupti_pcsampling.h
third_party/nvidia/backend/include/cupti_pcsampling_util.h
third_party/nvidia/backend/include/cupti_pmsampling.h
third_party/nvidia/backend/include/cupti_profiler_host.h
third_party/nvidia/backend/include/cupti_profiler_target.h
third_party/nvidia/backend/include/cupti_range_profiler.h
third_party/nvidia/backend/include/cupti_result.h
third_party/nvidia/backend/include/cupti_runtime_cbid.h
third_party/nvidia/backend/include/cupti_sass_metrics.h
third_party/nvidia/backend/include/cupti_target.h
third_party/nvidia/backend/include/cupti_version.h
third_party/nvidia/backend/include/device_atomic_functions.h
third_party/nvidia/backend/include/device_atomic_functions.hpp
third_party/nvidia/backend/include/device_double_functions.h
third_party/nvidia/backend/include/device_functions.h
third_party/nvidia/backend/include/device_launch_parameters.h
third_party/nvidia/backend/include/device_types.h
third_party/nvidia/backend/include/driver_functions.h
third_party/nvidia/backend/include/driver_types.h
third_party/nvidia/backend/include/fatbinary_section.h
third_party/nvidia/backend/include/generated_cudaGL_meta.h
third_party/nvidia/backend/include/generated_cudaVDPAU_meta.h
third_party/nvidia/backend/include/generated_cuda_gl_interop_meta.h
third_party/nvidia/backend/include/generated_cuda_meta.h
third_party/nvidia/backend/include/generated_cuda_runtime_api_meta.h
third_party/nvidia/backend/include/generated_cuda_vdpau_interop_meta.h
third_party/nvidia/backend/include/generated_cudart_removed_meta.h
third_party/nvidia/backend/include/generated_nvtx_meta.h
third_party/nvidia/backend/include/host_config.h
third_party/nvidia/backend/include/host_defines.h
third_party/nvidia/backend/include/library_types.h
third_party/nvidia/backend/include/math_constants.h
third_party/nvidia/backend/include/math_functions.h
third_party/nvidia/backend/include/mma.h
third_party/nvidia/backend/include/nvPTXCompiler.h
third_party/nvidia/backend/include/nvfunctional
third_party/nvidia/backend/include/nvperf_common.h
third_party/nvidia/backend/include/nvperf_cuda_host.h
third_party/nvidia/backend/include/nvperf_host.h
third_party/nvidia/backend/include/nvperf_target.h
third_party/nvidia/backend/include/sm_20_atomic_functions.h
third_party/nvidia/backend/include/sm_20_atomic_functions.hpp
third_party/nvidia/backend/include/sm_20_intrinsics.h
third_party/nvidia/backend/include/sm_20_intrinsics.hpp
third_party/nvidia/backend/include/sm_30_intrinsics.h
third_party/nvidia/backend/include/sm_30_intrinsics.hpp
third_party/nvidia/backend/include/sm_32_atomic_functions.h
third_party/nvidia/backend/include/sm_32_atomic_functions.hpp
third_party/nvidia/backend/include/sm_32_intrinsics.h
third_party/nvidia/backend/include/sm_32_intrinsics.hpp
third_party/nvidia/backend/include/sm_35_atomic_functions.h
third_party/nvidia/backend/include/sm_35_intrinsics.h
third_party/nvidia/backend/include/sm_60_atomic_functions.h
third_party/nvidia/backend/include/sm_60_atomic_functions.hpp
third_party/nvidia/backend/include/sm_61_intrinsics.h
third_party/nvidia/backend/include/sm_61_intrinsics.hpp
third_party/nvidia/backend/include/surface_functions.h
third_party/nvidia/backend/include/surface_indirect_functions.h
third_party/nvidia/backend/include/surface_types.h
third_party/nvidia/backend/include/texture_fetch_functions.h
third_party/nvidia/backend/include/texture_indirect_functions.h
third_party/nvidia/backend/include/texture_types.h
third_party/nvidia/backend/include/vector_functions.h
third_party/nvidia/backend/include/vector_functions.hpp
third_party/nvidia/backend/include/vector_types.h
third_party/nvidia/backend/include/Openacc/cupti_openacc.h
third_party/nvidia/backend/include/Openmp/cupti_openmp.h
third_party/nvidia/backend/include/Openmp/omp-tools.h
third_party/nvidia/backend/include/cooperative_groups/memcpy_async.h
third_party/nvidia/backend/include/cooperative_groups/reduce.h
third_party/nvidia/backend/include/cooperative_groups/scan.h
third_party/nvidia/backend/include/cooperative_groups/details/async.h
third_party/nvidia/backend/include/cooperative_groups/details/coalesced_reduce.h
third_party/nvidia/backend/include/cooperative_groups/details/coalesced_scan.h
third_party/nvidia/backend/include/cooperative_groups/details/driver_abi.h
third_party/nvidia/backend/include/cooperative_groups/details/functional.h
third_party/nvidia/backend/include/cooperative_groups/details/helpers.h
third_party/nvidia/backend/include/cooperative_groups/details/info.h
third_party/nvidia/backend/include/cooperative_groups/details/invoke.h
third_party/nvidia/backend/include/cooperative_groups/details/memory.h
third_party/nvidia/backend/include/cooperative_groups/details/partitioning.h
third_party/nvidia/backend/include/cooperative_groups/details/reduce.h
third_party/nvidia/backend/include/cooperative_groups/details/scan.h
third_party/nvidia/backend/include/cooperative_groups/details/sync.h
third_party/nvidia/backend/include/crt/common_functions.h
third_party/nvidia/backend/include/crt/cudacc_ext.h
third_party/nvidia/backend/include/crt/device_double_functions.h
third_party/nvidia/backend/include/crt/device_double_functions.hpp
third_party/nvidia/backend/include/crt/device_fp128_functions.h
third_party/nvidia/backend/include/crt/device_functions.h
third_party/nvidia/backend/include/crt/device_functions.hpp
third_party/nvidia/backend/include/crt/func_macro.h
third_party/nvidia/backend/include/crt/host_config.h
third_party/nvidia/backend/include/crt/host_defines.h
third_party/nvidia/backend/include/crt/host_runtime.h
third_party/nvidia/backend/include/crt/math_functions.h
third_party/nvidia/backend/include/crt/math_functions.hpp
third_party/nvidia/backend/include/crt/mma.h
third_party/nvidia/backend/include/crt/mma.hpp
third_party/nvidia/backend/include/crt/nvfunctional
third_party/nvidia/backend/include/crt/sm_100_rt.h
third_party/nvidia/backend/include/crt/sm_100_rt.hpp
third_party/nvidia/backend/include/crt/sm_70_rt.h
third_party/nvidia/backend/include/crt/sm_70_rt.hpp
third_party/nvidia/backend/include/crt/sm_80_rt.h
third_party/nvidia/backend/include/crt/sm_80_rt.hpp
third_party/nvidia/backend/include/crt/sm_90_rt.h
third_party/nvidia/backend/include/crt/sm_90_rt.hpp
third_party/nvidia/backend/include/crt/storage_class.h
third_party/nvidia/backend/lib/libdevice.10.bc
third_party/nvidia/backend/lib/cupti/libcheckpoint.so
third_party/nvidia/backend/lib/cupti/libcupti.so
third_party/nvidia/backend/lib/cupti/libcupti.so.12
third_party/nvidia/backend/lib/cupti/libcupti.so.2025.1.1
third_party/nvidia/backend/lib/cupti/libcupti_static.a
third_party/nvidia/backend/lib/cupti/libnvperf_host.so
third_party/nvidia/backend/lib/cupti/libnvperf_target.so
third_party/nvidia/backend/lib/cupti/libpcsamplingutil.so
third_party/nvidia/hopper/CMakeLists.txt
third_party/nvidia/hopper/include/CMakeLists.txt
third_party/nvidia/hopper/include/Transforms/CMakeLists.txt
third_party/nvidia/hopper/include/Transforms/Passes.h
third_party/nvidia/hopper/include/Transforms/Passes.td
third_party/nvidia/hopper/lib/CMakeLists.txt
third_party/nvidia/hopper/lib/Transforms/CMakeLists.txt
third_party/nvidia/hopper/lib/Transforms/WarpSpecialization.cpp
third_party/nvidia/hopper/lib/Transforms/WarpSpecialization/Utility.cpp
third_party/nvidia/hopper/lib/Transforms/WarpSpecialization/Utility.h
third_party/nvidia/hopper/lib/Transforms/WarpSpecialization/WSTaskPartition.cpp
third_party/nvidia/include/CMakeLists.txt
third_party/nvidia/include/cublas_instance.h
third_party/nvidia/include/cublas_types.h
third_party/nvidia/include/Dialect/CMakeLists.txt
third_party/nvidia/include/Dialect/NVGPU/CMakeLists.txt
third_party/nvidia/include/Dialect/NVGPU/IR/CMakeLists.txt
third_party/nvidia/include/Dialect/NVGPU/IR/Dialect.h
third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUAttrDefs.td
third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUDialect.td
third_party/nvidia/include/Dialect/NVGPU/IR/NVGPUOps.td
third_party/nvidia/include/Dialect/NVWS/CMakeLists.txt
third_party/nvidia/include/Dialect/NVWS/IR/CMakeLists.txt
third_party/nvidia/include/Dialect/NVWS/IR/Dialect.h
third_party/nvidia/include/Dialect/NVWS/IR/NVWSAttrDefs.td
third_party/nvidia/include/Dialect/NVWS/IR/NVWSDialect.td
third_party/nvidia/include/Dialect/NVWS/IR/NVWSOps.td
third_party/nvidia/include/Dialect/NVWS/IR/NVWSTypes.td
third_party/nvidia/include/Dialect/NVWS/Transforms/CMakeLists.txt
third_party/nvidia/include/Dialect/NVWS/Transforms/Passes.h
third_party/nvidia/include/Dialect/NVWS/Transforms/Passes.td
third_party/nvidia/include/NVGPUToLLVM/CMakeLists.txt
third_party/nvidia/include/NVGPUToLLVM/NVGPUToLLVMPass.h
third_party/nvidia/include/NVGPUToLLVM/Passes.h
third_party/nvidia/include/NVGPUToLLVM/Passes.td
third_party/nvidia/include/TritonNVIDIAGPUToLLVM/CMakeLists.txt
third_party/nvidia/include/TritonNVIDIAGPUToLLVM/PTXAsmFormat.h
third_party/nvidia/include/TritonNVIDIAGPUToLLVM/Passes.h
third_party/nvidia/include/TritonNVIDIAGPUToLLVM/Passes.td
third_party/nvidia/include/TritonNVIDIAGPUToLLVM/Utility.h
third_party/nvidia/language/cuda/__init__.py
third_party/nvidia/language/cuda/gdc.py
third_party/nvidia/language/cuda/libdevice.py
third_party/nvidia/language/cuda/utils.py
third_party/nvidia/lib/CMakeLists.txt
third_party/nvidia/lib/Dialect/CMakeLists.txt
third_party/nvidia/lib/Dialect/NVGPU/CMakeLists.txt
third_party/nvidia/lib/Dialect/NVGPU/IR/CMakeLists.txt
third_party/nvidia/lib/Dialect/NVGPU/IR/Dialect.cpp
third_party/nvidia/lib/Dialect/NVWS/CMakeLists.txt
third_party/nvidia/lib/Dialect/NVWS/IR/CMakeLists.txt
third_party/nvidia/lib/Dialect/NVWS/IR/Dialect.cpp
third_party/nvidia/lib/Dialect/NVWS/IR/Ops.cpp
third_party/nvidia/lib/Dialect/NVWS/Transforms/CMakeLists.txt
third_party/nvidia/lib/Dialect/NVWS/Transforms/LowerAref.cpp
third_party/nvidia/lib/Dialect/NVWS/Transforms/LowerWarpGroup.cpp
third_party/nvidia/lib/NVGPUToLLVM/CMakeLists.txt
third_party/nvidia/lib/NVGPUToLLVM/NVGPUToLLVMPass.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/BarrierOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/CMakeLists.txt
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ClusterOpsToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertLayoutOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ConvertWarpSpecializeToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/ElementwiseOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/Fp4ToFpOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/LoadStoreOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/MemoryOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/PTXAsmFormat.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/PatternTritonGPUOpToLLVM.h
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/SPMDOpToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TMAToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TargetInfo.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TargetInfo.h
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TensorMemoryToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TensorPtrOpsToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/TritonGPUToLLVM.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/Utility.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/Utility.h
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/MMAHelpers.h
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/MMAv2.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/MMAv5.cpp
third_party/nvidia/lib/TritonNVIDIAGPUToLLVM/DotOpToLLVM/WGMMA.cpp
third_party/nvidia/tools/cuda/compile.c
third_party/nvidia/tools/cuda/compile.h
third_party/nvidia/unittest/CMakeLists.txt
third_party/nvidia/unittest/Conversion/CMakeLists.txt
third_party/nvidia/unittest/Conversion/TritonGPUToLLVM/CMakeLists.txt
third_party/nvidia/unittest/Conversion/TritonGPUToLLVM/PTXAsmFormatTest.cpp
third_party/proton/.gitignore
third_party/proton/CMakeLists.txt
third_party/proton/README.md
third_party/proton/csrc/CMakeLists.txt
third_party/proton/csrc/Proton.cpp
third_party/proton/csrc/include/Proton.h
third_party/proton/csrc/include/Context/Context.h
third_party/proton/csrc/include/Context/Python.h
third_party/proton/csrc/include/Context/Shadow.h
third_party/proton/csrc/include/Data/Data.h
third_party/proton/csrc/include/Data/Metric.h
third_party/proton/csrc/include/Data/TraceData.h
third_party/proton/csrc/include/Data/TreeData.h
third_party/proton/csrc/include/Driver/Device.h
third_party/proton/csrc/include/Driver/Dispatch.h
third_party/proton/csrc/include/Driver/GPU/CudaApi.h
third_party/proton/csrc/include/Driver/GPU/CuptiApi.h
third_party/proton/csrc/include/Driver/GPU/HipApi.h
third_party/proton/csrc/include/Driver/GPU/HsaApi.h
third_party/proton/csrc/include/Driver/GPU/RoctracerApi.h
third_party/proton/csrc/include/Profiler/GPUProfiler.h
third_party/proton/csrc/include/Profiler/Profiler.h
third_party/proton/csrc/include/Profiler/Cupti/CuptiPCSampling.h
third_party/proton/csrc/include/Profiler/Cupti/CuptiProfiler.h
third_party/proton/csrc/include/Profiler/Roctracer/RoctracerProfiler.h
third_party/proton/csrc/include/Session/Session.h
third_party/proton/csrc/include/Utility/Atomic.h
third_party/proton/csrc/include/Utility/Errors.h
third_party/proton/csrc/include/Utility/Map.h
third_party/proton/csrc/include/Utility/Set.h
third_party/proton/csrc/include/Utility/Singleton.h
third_party/proton/csrc/include/Utility/String.h
third_party/proton/csrc/include/Utility/Traits.h
third_party/proton/csrc/lib/CMakeLists.txt
third_party/proton/csrc/lib/Context/CMakeLists.txt
third_party/proton/csrc/lib/Context/Context.cpp
third_party/proton/csrc/lib/Context/Python.cpp
third_party/proton/csrc/lib/Context/Shadow.cpp
third_party/proton/csrc/lib/Data/CMakeLists.txt
third_party/proton/csrc/lib/Data/Data.cpp
third_party/proton/csrc/lib/Data/TraceData.cpp
third_party/proton/csrc/lib/Data/TreeData.cpp
third_party/proton/csrc/lib/Driver/CMakeLists.txt
third_party/proton/csrc/lib/Driver/Device.cpp
third_party/proton/csrc/lib/Driver/GPU/CudaApi.cpp
third_party/proton/csrc/lib/Driver/GPU/CuptiApi.cpp
third_party/proton/csrc/lib/Driver/GPU/HipApi.cpp
third_party/proton/csrc/lib/Driver/GPU/HsaApi.cpp
third_party/proton/csrc/lib/Driver/GPU/RoctracerApi.cpp
third_party/proton/csrc/lib/Profiler/CMakeLists.txt
third_party/proton/csrc/lib/Profiler/Cupti/CuptiPCSampling.cpp
third_party/proton/csrc/lib/Profiler/Cupti/CuptiProfiler.cpp
third_party/proton/csrc/lib/Profiler/RocTracer/RoctracerProfiler.cpp
third_party/proton/csrc/lib/Session/CMakeLists.txt
third_party/proton/csrc/lib/Session/Session.cpp
third_party/proton/dialect/CMakeLists.txt
third_party/proton/dialect/triton_proton.cc
third_party/proton/dialect/include/CMakeLists.txt
third_party/proton/dialect/include/Dialect/CMakeLists.txt
third_party/proton/dialect/include/Dialect/Proton/CMakeLists.txt
third_party/proton/dialect/include/Dialect/Proton/IR/CMakeLists.txt
third_party/proton/dialect/include/Dialect/Proton/IR/Dialect.h
third_party/proton/dialect/include/Dialect/Proton/IR/ProtonAttrDefs.td
third_party/proton/dialect/include/Dialect/Proton/IR/ProtonDialect.td
third_party/proton/dialect/include/Dialect/Proton/IR/ProtonOps.td
third_party/proton/dialect/include/TritonProtonToLLVM/PatternTritonProtonOpToLLVM.h
third_party/proton/dialect/lib/CMakeLists.txt
third_party/proton/dialect/lib/Dialect/CMakeLists.txt
third_party/proton/dialect/lib/Dialect/Proton/CMakeLists.txt
third_party/proton/dialect/lib/Dialect/Proton/IR/CMakeLists.txt
third_party/proton/dialect/lib/Dialect/Proton/IR/Dialect.cpp
third_party/proton/dialect/lib/Dialect/Proton/IR/Ops.cpp
third_party/proton/dialect/lib/TritonProtonToLLVM/CMakeLists.txt
third_party/proton/dialect/lib/TritonProtonToLLVM/RecordOpToLLVM.cpp
third_party/proton/proton/__init__.py
third_party/proton/proton/context.py
third_party/proton/proton/flags.py
third_party/proton/proton/hook.py
third_party/proton/proton/language.py
third_party/proton/proton/profile.py
third_party/proton/proton/proton.py
third_party/proton/proton/scope.py
third_party/proton/proton/state.py
third_party/proton/proton/viewer.py
third_party/proton/test/helper.py
third_party/proton/test/helper_kernels.py
third_party/proton/test/instrument.py
third_party/proton/test/test_api.py
third_party/proton/test/test_cmd.py
third_party/proton/test/test_lib.py
third_party/proton/test/test_profile.py
third_party/proton/test/test_record.py
third_party/proton/test/test_viewer.py
third_party/proton/test/examples/cuda.json
third_party/proton/test/examples/frame.json
third_party/proton/test/examples/hip.json
third_party/proton/test/examples/leaf_nodes.json
third_party/proton/test/examples/triton.json
third_party/proton/tutorials/dynamic_net.py
third_party/proton/tutorials/matmul.py
unittest/CMakeLists.txt
unittest/googletest.cmake
unittest/Analysis/CMakeLists.txt
unittest/Analysis/UtilityTest.cpp
unittest/Dialect/CMakeLists.txt
unittest/Dialect/TritonGPU/CMakeLists.txt
unittest/Dialect/TritonGPU/DialectTest.cpp
unittest/Dialect/TritonGPU/DumpLayoutTest.cpp
unittest/Dialect/TritonGPU/LinearLayoutConversionsTest.cpp
unittest/Dialect/TritonGPU/SwizzleTest.cpp
unittest/Dialect/TritonMetal/CMakeLists.txt
unittest/Dialect/TritonMetal/HardwareDetectionTest.cpp
unittest/Dialect/TritonMetal/MLXIntegrationTest.cpp
unittest/Dialect/TritonMetal/README.md
unittest/Dialect/TritonMetal/IR/CMakeLists.txt
unittest/Dialect/TritonMetal/IR/DialectTest.cpp
unittest/Dialect/TritonMetal/Transforms/CMakeLists.txt
unittest/Dialect/TritonMetal/Transforms/M3OptimizationsTest.cpp
unittest/Dialect/TritonMetal/Transforms/MemoryOptimizerTest.cpp
unittest/Dialect/TritonMetal/Transforms/TransformsTest.cpp
unittest/Metal/CMakeLists.txt
unittest/Metal/HardwareDetectionTest.cpp
unittest/Metal/M3OptimizationsTest.cpp
unittest/Metal/MLXIntegrationTest.cpp
unittest/Metal/MetalBackendTest.cpp
unittest/Metal/MetalMemoryManagerTest.cpp
unittest/Metal/OperationFusionTest.cpp
unittest/Metal/README.md
unittest/Metal/TensorCoreTest.cpp
unittest/Tools/CMakeLists.txt
unittest/Tools/LayoutUtilsTest.cpp
unittest/Tools/LinearLayoutTest.cpp