torch>=2.10.0
cuda-python
nvidia-cutlass-dsl>=4.4.1
nvidia-cutlass-dsl-libs-base>=4.4.1
nvidia-cutlass-dsl-libs-cu13>=4.4.1
apache-tvm-ffi!=0.1.8,!=0.1.8.post0,<0.2,>=0.1.6
rich>=13
fastapi>=0.115
uvicorn>=0.34
transformers>=4.51
safetensors>=0.6

[dev]
pytest
