README.md
pyproject.toml
tests/test_analytics.py
tests/test_auth.py
tests/test_billing.py
tests/test_cli_coverage.py
tests/test_cli_parity_integration.py
tests/test_config_integration.py
tests/test_distributed_traces_cli.py
tests/test_file_operations_integration.py
tests/test_kernel_scope_cli.py
tests/test_nsys_analyze.py
tests/test_nsys_profile.py
tests/test_output.py
tests/test_rocprof_compute_integration.py
tests/test_skill_commands.py
tests/test_ssh_integration.py
tests/test_targets_ops.py
tests/test_wevin_cli.py
tests/test_workflow_integration.py
wafer/GUIDE.md
wafer/__init__.py
wafer/agent_defaults.py
wafer/analytics.py
wafer/api_client.py
wafer/auth.py
wafer/autotuner.py
wafer/baseline.py
wafer/billing.py
wafer/cli.py
wafer/cli_instructions.py
wafer/config.py
wafer/corpus.py
wafer/distributed_traces.py
wafer/evaluate.py
wafer/global_config.py
wafer/gpu_run.py
wafer/inference.py
wafer/kernel_scope.py
wafer/ncu_analyze.py
wafer/nsys_analyze.py
wafer/nsys_profile.py
wafer/output.py
wafer/problems.py
wafer/rocprof_compute.py
wafer/rocprof_sdk.py
wafer/rocprof_systems.py
wafer/specs_cli.py
wafer/ssh_keys.py
wafer/target_lock.py
wafer/targets.py
wafer/targets_cli.py
wafer/targets_ops.py
wafer/trace_compare.py
wafer/tracelens.py
wafer/wevin_cli.py
wafer/workspaces.py
wafer/corpora/amd/amd_instinct_gpu_specs.md
wafer/corpora/amd/cdna2/01-architecture-overview.md
wafer/corpora/amd/cdna2/02-matrix-instructions.md
wafer/corpora/amd/cdna2/README.md
wafer/corpora/amd/cdna3-isa/01-introduction.md
wafer/corpora/amd/cdna3-isa/02-program-organization.md
wafer/corpora/amd/cdna3-isa/03-kernel-state.md
wafer/corpora/amd/cdna3-isa/04-program-flow-control.md
wafer/corpora/amd/cdna3-isa/05-scalar-alu.md
wafer/corpora/amd/cdna3-isa/06-vector-alu.md
wafer/corpora/amd/cdna3-isa/07-matrix-instructions.md
wafer/corpora/amd/cdna3-isa/08-scalar-memory.md
wafer/corpora/amd/cdna3-isa/09-vector-memory.md
wafer/corpora/amd/cdna3-isa/10-flat-memory.md
wafer/corpora/amd/cdna3-isa/11-data-share.md
wafer/corpora/amd/cdna3-isa/README.md
wafer/corpora/amd/composable-kernel/01-ck-overview.md
wafer/corpora/amd/hip/01-hip-programming-model.md
wafer/corpora/amd/hip/02-hip-memory-management.md
wafer/corpora/amd/hip/03-hip-synchronization.md
wafer/corpora/amd/hip/04-hip-intrinsics.md
wafer/corpora/amd/rocm-profiling/01-rocprofiler-overview.md
wafer/corpora/common/flash-attention/01-flash-attention-overview.md
wafer/corpora/common/vllm/01-vllm-overview.md
wafer/corpora/nvidia/blackwell/01-architecture-overview.md
wafer/corpora/nvidia/cuda-guide/01-cuda-programming-model.md
wafer/corpora/nvidia/cuda-guide/02-cuda-memory-management.md
wafer/corpora/nvidia/cuda-guide/03-cuda-best-practices.md
wafer/corpora/nvidia/cuda-guide/04-cuda-streams-events.md
wafer/corpora/nvidia/cutlass/01-cutlass-overview.md
wafer/corpora/nvidia/hopper/01-overview.md
wafer/corpora/nvidia/hopper/02-streaming-multiprocessor.md
wafer/corpora/nvidia/hopper/03-tensor-cores.md
wafer/corpora/nvidia/hopper/04-memory-hierarchy.md
wafer/corpora/nvidia/hopper/05-synchronization.md
wafer/corpora/nvidia/hopper/README.md
wafer/corpora/nvidia/nsight/01-nsight-compute-overview.md
wafer/corpora/nvidia/nsight/02-nsight-systems.md
wafer/corpora/nvidia/ptx-isa/01-ptx-overview.md
wafer/corpora/nvidia/ptx-isa/02-ptx-tensor-operations.md
wafer/corpora/nvidia/triton/01-triton-overview.md
wafer/skills/wafer-guide/SKILL.md
wafer/templates/__init__.py
wafer/templates/aiter_optimize.py
wafer/templates/ask_docs.py
wafer/templates/audit.py
wafer/templates/optimize_flashinfer.py
wafer/templates/optimize_kernel.py
wafer/templates/optimize_kernelbench.py
wafer/templates/optimize_vllm.py
wafer/templates/trace_analyze.py
wafer/tests/test_eval_cli_parity.py
wafer_cli.egg-info/PKG-INFO
wafer_cli.egg-info/SOURCES.txt
wafer_cli.egg-info/dependency_links.txt
wafer_cli.egg-info/entry_points.txt
wafer_cli.egg-info/requires.txt
wafer_cli.egg-info/top_level.txt