LICENSE
NOTICE
README.md
pyproject.toml
setup.cfg
mlstm_kernels/__init__.py
mlstm_kernels.egg-info/PKG-INFO
mlstm_kernels.egg-info/SOURCES.txt
mlstm_kernels.egg-info/dependency_links.txt
mlstm_kernels.egg-info/top_level.txt
mlstm_kernels/jax/__init__.py
mlstm_kernels/jax/stride_utils.py
mlstm_kernels/jax/utils.py
mlstm_kernels/jax/xla_utils.py
mlstm_kernels/jax/chunkwise/__init__.py
mlstm_kernels/jax/chunkwise/native/__init__.py
mlstm_kernels/jax/chunkwise/native/fw.py
mlstm_kernels/jax/chunkwise/native/fwbw.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/__init__.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/bw.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/bw_parallel.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/bw_recurrent.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/fw.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/fw_parallel.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/fw_recurrent.py
mlstm_kernels/jax/chunkwise/triton_limit_chunk/fwbw.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/__init__.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/bw.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/bw_parallel_dK.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/bw_parallel_dQ.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/bw_parallel_dV.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/bw_recurrent.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/chunkwise_gates.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/fw.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/fw_parallel.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/fw_recurrent.py
mlstm_kernels/jax/chunkwise/triton_xl_chunk/fwbw.py
mlstm_kernels/jax/parallel/__init__.py
mlstm_kernels/jax/parallel/native/__init__.py
mlstm_kernels/jax/parallel/native/bw.py
mlstm_kernels/jax/parallel/native/fw.py
mlstm_kernels/jax/parallel/native/fwbw.py
mlstm_kernels/jax/parallel/native_stablef/__init__.py
mlstm_kernels/jax/parallel/native_stablef/bw.py
mlstm_kernels/jax/parallel/native_stablef/fw.py
mlstm_kernels/jax/parallel/native_stablef/fwbw.py
mlstm_kernels/jax/recurrent/__init__.py
mlstm_kernels/jax/recurrent/native_sequence.py
mlstm_kernels/jax/recurrent/native_sequence_scan.py
mlstm_kernels/jax/recurrent/native_step.py
mlstm_kernels/jax/recurrent/triton_step.py
mlstm_kernels/torch/__init__.py
mlstm_kernels/torch/backend_module.py
mlstm_kernels/torch/kernel_wrappers.py
mlstm_kernels/torch/utils.py
mlstm_kernels/torch/chunkwise/__init__.py
mlstm_kernels/torch/chunkwise/native/__init__.py
mlstm_kernels/torch/chunkwise/native/bw.py
mlstm_kernels/torch/chunkwise/native/fw.py
mlstm_kernels/torch/chunkwise/native/fwbw.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/__init__.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/bw.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/bw_parallel.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/bw_recurrent.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/chunkwise_gates.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/fw.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/fw_parallel.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/fw_recurrent.py
mlstm_kernels/torch/chunkwise/triton_limit_chunk/fwbw.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/__init__.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/bw.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/bw_parallel_dK.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/bw_parallel_dQ.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/bw_parallel_dV.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/bw_recurrent.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/chunkwise_gates.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/fw.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/fw_parallel.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/fw_recurrent.py
mlstm_kernels/torch/chunkwise/triton_xl_chunk/fwbw.py
mlstm_kernels/torch/parallel/__init__.py
mlstm_kernels/torch/parallel/_native_tiled.py
mlstm_kernels/torch/parallel/native/__init__.py
mlstm_kernels/torch/parallel/native/bw.py
mlstm_kernels/torch/parallel/native/fw.py
mlstm_kernels/torch/parallel/native/fwbw.py
mlstm_kernels/torch/parallel/native_stablef/__init__.py
mlstm_kernels/torch/parallel/native_stablef/bw.py
mlstm_kernels/torch/parallel/native_stablef/fw.py
mlstm_kernels/torch/parallel/native_stablef/fwbw.py
mlstm_kernels/torch/parallel/triton_limit_headdim/__init__.py
mlstm_kernels/torch/parallel/triton_limit_headdim/bw.py
mlstm_kernels/torch/parallel/triton_limit_headdim/fw.py
mlstm_kernels/torch/parallel/triton_limit_headdim/fwbw.py
mlstm_kernels/torch/recurrent/__init__.py
mlstm_kernels/torch/recurrent/native_sequence.py
mlstm_kernels/torch/recurrent/native_step.py
mlstm_kernels/torch/recurrent/triton_step.py
mlstm_kernels/torch/recurrent/triton_step_alternate.py
mlstm_kernels/utils/__init__.py
mlstm_kernels/utils/kernels.py
mlstm_kernels/utils/time.py
mlstm_kernels/utils/benchmark/__init__.py
mlstm_kernels/utils/benchmark/cuda_graphs.py
mlstm_kernels/utils/benchmark/param_handling.py
mlstm_kernels/utils/benchmark/plot_config.py
mlstm_kernels/utils/benchmark/plot_results.py
mlstm_kernels/utils/benchmark/run_benchmark.py
mlstm_kernels/utils/benchmark/runtime.py
mlstm_kernels/utils/benchmark/utils.py
mlstm_kernels/utils/flops/__init__.py
mlstm_kernels/utils/flops/mlstm_block_flop_counts.py
mlstm_kernels/utils/flops/mlstm_flop_analysis.py
mlstm_kernels/utils/flops/model_flops_computation.py
mlstm_kernels/utils/flops/slstm_block_flop_counts.py
mlstm_kernels/utils/flops/transformer_block_flop_counts.py
mlstm_kernels/utils/plot/__init__.py
mlstm_kernels/utils/plot/diff_imshow.py
mlstm_kernels/utils/plot/diff_lineplot.py
mlstm_kernels/utils/plot/ewma.py
mlstm_kernels/utils/test/__init__.py
mlstm_kernels/utils/test/checks.py
mlstm_kernels/utils/test/fixtures.py
mlstm_kernels/utils/test/test_fwbw.py
mlstm_kernels/utils/test/test_templates/__init__.py
tests/test_padding.py