cupy-cuda12x>=13.0
scipy>=1.11
numpy>=1.24

[triton]
triton>=3.0.0
torch>=2.0
