LICENSE
README.md
pyproject.toml
flashlib/__init__.py
flashlib/_hw.py
flashlib/_lazy.py
flashlib/diagnose.py
flashlib.egg-info/PKG-INFO
flashlib.egg-info/SOURCES.txt
flashlib.egg-info/dependency_links.txt
flashlib.egg-info/requires.txt
flashlib.egg-info/top_level.txt
flashlib/applications/__init__.py
flashlib/applications/dbscan.py
flashlib/applications/hdbscan.py
flashlib/applications/kmeans.py
flashlib/applications/knn.py
flashlib/applications/linear_regression.py
flashlib/applications/logistic_regression.py
flashlib/applications/multinomial_nb.py
flashlib/applications/pca.py
flashlib/applications/random_forest.py
flashlib/applications/ridge.py
flashlib/applications/spectral_clustering.py
flashlib/applications/standard_scaler.py
flashlib/applications/truncated_svd.py
flashlib/applications/tsne.py
flashlib/applications/umap.py
flashlib/info/__init__.py
flashlib/info/dispatch.py
flashlib/info/estimate.py
flashlib/info/flop_models.py
flashlib/info/registry.py
flashlib/info/roofline.py
flashlib/kernels/__init__.py
flashlib/kernels/cute_helpers.py
flashlib/kernels/connected_components/__init__.py
flashlib/kernels/connected_components/cost.py
flashlib/kernels/connected_components/triton/__init__.py
flashlib/kernels/connected_components/triton/connected_components.py
flashlib/kernels/distance/__init__.py
flashlib/kernels/distance/cost.py
flashlib/kernels/distance/triton/__init__.py
flashlib/kernels/distance/triton/_common.py
flashlib/kernels/distance/triton/affinity.py
flashlib/kernels/distance/triton/knn_gather_l2sq.py
flashlib/kernels/distance/triton/mrd.py
flashlib/kernels/distance/triton/pairwise.py
flashlib/kernels/flash_mst/__init__.py
flashlib/kernels/flash_mst/cost.py
flashlib/kernels/flash_mst/triton/__init__.py
flashlib/kernels/flash_mst/triton/flash_mst.py
flashlib/linalg/__init__.py
flashlib/linalg/ab_gemm/__init__.py
flashlib/linalg/ab_gemm/cost.py
flashlib/linalg/ab_gemm/triton/__init__.py
flashlib/linalg/ab_gemm/triton/ab_gemm.py
flashlib/linalg/cholesky/__init__.py
flashlib/linalg/cholesky/cutedsl/__init__.py
flashlib/linalg/cholesky/cutedsl/potrf.py
flashlib/linalg/cov_gemm/__init__.py
flashlib/linalg/cov_gemm/cost.py
flashlib/linalg/cov_gemm/triton/__init__.py
flashlib/linalg/cov_gemm/triton/cov_gemm.py
flashlib/linalg/eigh/__init__.py
flashlib/linalg/eigh/cost.py
flashlib/linalg/eigh/cusolver.py
flashlib/linalg/eigh/halko.py
flashlib/linalg/eigh/impl.py
flashlib/linalg/eigh/jacobi.py
flashlib/linalg/eigh/qdwh.py
flashlib/linalg/eigh/qdwh_ns.py
flashlib/linalg/eigh/triton/__init__.py
flashlib/linalg/eigh/triton/householder.py
flashlib/linalg/eigh/triton/jacobi.py
flashlib/linalg/gemm/__init__.py
flashlib/linalg/gemm/bf16.py
flashlib/linalg/gemm/bf16_x3.py
flashlib/linalg/gemm/fp16.py
flashlib/linalg/gemm/fp16_x3.py
flashlib/linalg/gemm/fp16_x3_kahan.py
flashlib/linalg/gemm/fp16_x9.py
flashlib/linalg/gemm/fp32.py
flashlib/linalg/gemm/ozaki2_dispatch.py
flashlib/linalg/gemm/ozaki2_int8.py
flashlib/linalg/gemm/ozaki2_portable.py
flashlib/linalg/gemm/ozaki_constants.py
flashlib/linalg/gemm/route.py
flashlib/linalg/gemm/tf32.py
flashlib/linalg/gemm/tf32_x3.py
flashlib/linalg/gemm/tf32_x6.py
flashlib/linalg/gemm/cutedsl/__init__.py
flashlib/linalg/gemm/cutedsl/bf16_chained.py
flashlib/linalg/gemm/cutedsl/bf16x3_fused.py
flashlib/linalg/gemm/cutedsl/fp16x9.py
flashlib/linalg/gemm/cutedsl/hopper_wgmma_bf16.py
flashlib/linalg/gemm/cutedsl/int8_gemm.py
flashlib/linalg/gemm/cutedsl/lib/__init__.py
flashlib/linalg/gemm/cutedsl/lib/hopper_gemm.py
flashlib/linalg/gemm/cutedsl/lib/hopper_gemm_bf16x3.py
flashlib/linalg/gemm/cutedsl/lib/hopper_gemm_x9.py
flashlib/linalg/gemm/native/__init__.py
flashlib/linalg/gemm/native/cublas_tf32x6.py
flashlib/linalg/gemm/native/gemmul8.py
flashlib/linalg/gemm/triton/__init__.py
flashlib/linalg/gemm/triton/crt_reconstruct.py
flashlib/linalg/gemm/triton/fp16x3_kahan.py
flashlib/linalg/gemm/triton/fused_kernels.py
flashlib/linalg/gemm/triton/ozaki1_dispatch.py
flashlib/linalg/gemm/triton/ozaki2_split.py
flashlib/linalg/gemm/triton/split.py
flashlib/linalg/gemm/triton/split_fp16.py
flashlib/linalg/gemm/triton/split_helpers.py
flashlib/linalg/gemm/triton/sum3.py
flashlib/linalg/gemm/triton/tall_skinny.py
flashlib/linalg/gemm/triton/triton_mm.py
flashlib/linalg/gram_gemm/__init__.py
flashlib/linalg/gram_gemm/cost.py
flashlib/linalg/gram_gemm/triton/__init__.py
flashlib/linalg/gram_gemm/triton/gram_gemm.py
flashlib/linalg/orthonormalize/__init__.py
flashlib/linalg/orthonormalize/btrtri.py
flashlib/linalg/orthonormalize/cholqr2.py
flashlib/linalg/orthonormalize/cost.py
flashlib/linalg/polar/__init__.py
flashlib/linalg/polar/cost.py
flashlib/linalg/polar/express_coeffs.py
flashlib/linalg/polar/polar_express.py
flashlib/linalg/polar/qdwh_hybrid.py
flashlib/linalg/polar/zolo.py
flashlib/linalg/qr/__init__.py
flashlib/linalg/qr/cutedsl/__init__.py
flashlib/linalg/qr/cutedsl/geqrf.py
flashlib/linalg/trsm/__init__.py
flashlib/linalg/trsm/cutedsl/__init__.py
flashlib/linalg/trsm/cutedsl/trsm.py
flashlib/primitives/__init__.py
flashlib/primitives/dbscan/__init__.py
flashlib/primitives/dbscan/cost.py
flashlib/primitives/dbscan/impl.py
flashlib/primitives/dbscan/cutedsl/__init__.py
flashlib/primitives/dbscan/cutedsl/grid_radius.py
flashlib/primitives/dbscan/triton/__init__.py
flashlib/primitives/dbscan/triton/dbscan.py
flashlib/primitives/dbscan/triton/dbscan_legacy.py
flashlib/primitives/hdbscan/__init__.py
flashlib/primitives/hdbscan/cost.py
flashlib/primitives/hdbscan/impl.py
flashlib/primitives/hdbscan/cutedsl/__init__.py
flashlib/primitives/hdbscan/cutedsl/mrd_edges.py
flashlib/primitives/hdbscan/triton/__init__.py
flashlib/primitives/hdbscan/triton/_tree_helpers.py
flashlib/primitives/hdbscan/triton/hdbscan.py
flashlib/primitives/hdbscan/triton/legacy.py
flashlib/primitives/hdbscan/triton/sparse_mst.py
flashlib/primitives/kmeans/__init__.py
flashlib/primitives/kmeans/cost.py
flashlib/primitives/kmeans/impl.py
flashlib/primitives/kmeans/large.py
flashlib/primitives/kmeans/torch_fallback.py
flashlib/primitives/kmeans/cutedsl/__init__.py
flashlib/primitives/kmeans/cutedsl/assign.py
flashlib/primitives/kmeans/cutedsl/assign_kernel.py
flashlib/primitives/kmeans/triton/__init__.py
flashlib/primitives/kmeans/triton/assign.py
flashlib/primitives/kmeans/triton/kmeans.py
flashlib/primitives/kmeans/triton/update.py
flashlib/primitives/knn/__init__.py
flashlib/primitives/knn/cost.py
flashlib/primitives/knn/impl.py
flashlib/primitives/knn/torch_fallback.py
flashlib/primitives/knn/cutedsl/__init__.py
flashlib/primitives/knn/cutedsl/fused_kernel.py
flashlib/primitives/knn/cutedsl/impl.py
flashlib/primitives/knn/triton/__init__.py
flashlib/primitives/knn/triton/_common.py
flashlib/primitives/knn/triton/_row_norm.py
flashlib/primitives/knn/triton/dispatch.py
flashlib/primitives/knn/triton/insert.py
flashlib/primitives/knn/triton/sortmerge.py
flashlib/primitives/linear_regression/__init__.py
flashlib/primitives/linear_regression/cost.py
flashlib/primitives/linear_regression/impl.py
flashlib/primitives/linear_regression/cutedsl/__init__.py
flashlib/primitives/linear_regression/cutedsl/xtx.py
flashlib/primitives/linear_regression/triton/__init__.py
flashlib/primitives/linear_regression/triton/fused_kernels.py
flashlib/primitives/linear_regression/triton/legacy.py
flashlib/primitives/linear_regression/triton/linear_regression.py
flashlib/primitives/logistic_regression/__init__.py
flashlib/primitives/logistic_regression/cost.py
flashlib/primitives/logistic_regression/impl.py
flashlib/primitives/logistic_regression/cutedsl/__init__.py
flashlib/primitives/logistic_regression/cutedsl/fwd_gemv.py
flashlib/primitives/logistic_regression/triton/__init__.py
flashlib/primitives/logistic_regression/triton/logistic_regression.py
flashlib/primitives/multinomial_nb/__init__.py
flashlib/primitives/multinomial_nb/cost.py
flashlib/primitives/multinomial_nb/impl.py
flashlib/primitives/multinomial_nb/cutedsl/__init__.py
flashlib/primitives/multinomial_nb/cutedsl/nb.py
flashlib/primitives/multinomial_nb/triton/__init__.py
flashlib/primitives/multinomial_nb/triton/nb.py
flashlib/primitives/multinomial_nb/triton/nb_core.py
flashlib/primitives/pca/__init__.py
flashlib/primitives/pca/cost.py
flashlib/primitives/pca/impl.py
flashlib/primitives/pca/cutedsl/__init__.py
flashlib/primitives/pca/cutedsl/gemm.py
flashlib/primitives/pca/triton/__init__.py
flashlib/primitives/pca/triton/fused_kernels.py
flashlib/primitives/pca/triton/legacy.py
flashlib/primitives/pca/triton/pca.py
flashlib/primitives/random_forest/__init__.py
flashlib/primitives/random_forest/cost.py
flashlib/primitives/random_forest/impl.py
flashlib/primitives/random_forest/cutedsl/__init__.py
flashlib/primitives/random_forest/cutedsl/predict.py
flashlib/primitives/random_forest/triton/__init__.py
flashlib/primitives/random_forest/triton/histogram.py
flashlib/primitives/random_forest/triton/rf_kernels.py
flashlib/primitives/ridge/__init__.py
flashlib/primitives/ridge/cost.py
flashlib/primitives/ridge/impl.py
flashlib/primitives/ridge/cutedsl/__init__.py
flashlib/primitives/ridge/cutedsl/ridge.py
flashlib/primitives/ridge/triton/__init__.py
flashlib/primitives/ridge/triton/legacy.py
flashlib/primitives/ridge/triton/ridge.py
flashlib/primitives/spectral_clustering/__init__.py
flashlib/primitives/spectral_clustering/cost.py
flashlib/primitives/spectral_clustering/impl.py
flashlib/primitives/spectral_clustering/cutedsl/__init__.py
flashlib/primitives/spectral_clustering/cutedsl/spectral.py
flashlib/primitives/spectral_clustering/triton/__init__.py
flashlib/primitives/spectral_clustering/triton/spectral.py
flashlib/primitives/standard_scaler/__init__.py
flashlib/primitives/standard_scaler/cost.py
flashlib/primitives/standard_scaler/impl.py
flashlib/primitives/standard_scaler/cutedsl/__init__.py
flashlib/primitives/standard_scaler/cutedsl/scaler.py
flashlib/primitives/standard_scaler/triton/__init__.py
flashlib/primitives/standard_scaler/triton/scaler.py
flashlib/primitives/truncated_svd/__init__.py
flashlib/primitives/truncated_svd/cost.py
flashlib/primitives/truncated_svd/impl.py
flashlib/primitives/truncated_svd/cutedsl/__init__.py
flashlib/primitives/truncated_svd/cutedsl/svd.py
flashlib/primitives/truncated_svd/triton/__init__.py
flashlib/primitives/truncated_svd/triton/fused_kernels.py
flashlib/primitives/truncated_svd/triton/legacy.py
flashlib/primitives/truncated_svd/triton/svd.py
flashlib/primitives/tsne/__init__.py
flashlib/primitives/tsne/cost.py
flashlib/primitives/tsne/impl.py
flashlib/primitives/tsne/cutedsl/__init__.py
flashlib/primitives/tsne/cutedsl/tsne.py
flashlib/primitives/tsne/triton/__init__.py
flashlib/primitives/tsne/triton/grad.py
flashlib/primitives/tsne/triton/grad_blocked.py
flashlib/primitives/tsne/triton/train.py
flashlib/primitives/umap/__init__.py
flashlib/primitives/umap/cost.py
flashlib/primitives/umap/impl.py
flashlib/primitives/umap/cutedsl/__init__.py
flashlib/primitives/umap/cutedsl/umap.py
flashlib/primitives/umap/triton/__init__.py
flashlib/primitives/umap/triton/flash_umap.py
flashlib/primitives/umap/triton/fuzzy_simplicial_set.py
flashlib/primitives/umap/triton/sgd_legacy.py
flashlib/primitives/umap/triton/sgd_step.py
flashlib/primitives/umap/triton/smooth_knn_dist.py
tests/test_advantage_boundaries.py
tests/test_backend_parity.py
tests/test_eigh_halko.py
tests/test_gemm_variants_parity.py
tests/test_imports.py
tests/test_info.py
tests/test_multi_dsl_integration.py
tests/test_primitives_correctness.py