torch>=2.10.0
cuda-python
nvidia-cutlass-dsl[cu13]==4.4.1
apache-tvm-ffi!=0.1.8,!=0.1.8.post0,<0.2,>=0.1.6
rich>=13
fastapi>=0.115
uvicorn>=0.34
transformers>=4.51
safetensors>=0.6

[dev]
pytest
