Metadata-Version: 2.4
Name: llm-infer
Version: 0.1.0
Summary: A readable LLM inference server implementing paged attention and continuous batching
License-Expression: Apache-2.0
Requires-Python: >=3.11
License-File: LICENSE
Requires-Dist: appinfra[fastapi]<0.5.0,>=0.4.0
Requires-Dist: httpx<1.0.0,>=0.27.0
Requires-Dist: aiohttp<4.0.0,>=3.13.3
Requires-Dist: filelock<4.0.0,>=3.20.1
Requires-Dist: urllib3<3.0.0,>=2.6.3
Requires-Dist: werkzeug<4.0.0,>=3.1.5
Provides-Extra: runtime
Requires-Dist: torch<3.0.0,>=2.0.0; extra == "runtime"
Requires-Dist: transformers<5.0.0,>=4.30.0; extra == "runtime"
Requires-Dist: safetensors<1.0.0,>=0.4.0; extra == "runtime"
Provides-Extra: anthropic
Requires-Dist: anthropic<1.0.0,>=0.47.0; extra == "anthropic"
Provides-Extra: cuda
Requires-Dist: flashinfer-python<1.0.0,>=0.2.0; extra == "cuda"
Requires-Dist: pynvml<13.0.0,>=11.0.0; extra == "cuda"
Provides-Extra: dev
Requires-Dist: coverage<8.0.0,>=7.0.0; extra == "dev"
Requires-Dist: ruff<1.0.0,>=0.1.0; extra == "dev"
Requires-Dist: mypy<2.0.0,>=1.0.0; extra == "dev"
Requires-Dist: types-PyYAML>=6.0.0; extra == "dev"
Requires-Dist: pytest<10.0.0,>=7.0.0; extra == "dev"
Requires-Dist: pytest-asyncio<2.0.0,>=0.21.0; extra == "dev"
Requires-Dist: pytest-cov<8.0.0,>=4.0.0; extra == "dev"
Requires-Dist: pytest-xdist<4.0.0,>=3.0.0; extra == "dev"
Dynamic: license-file
