.coderabbit.yaml
.env.yaml.example
.gitignore
CHANGELOG.md
CONTRIBUTING.md
LICENSE
Makefile
README.md
SECURITY.md
llm-infer.py
pyproject.toml
setup.cfg
.github/CODEOWNERS
.github/workflows/ci.yml
.github/workflows/release.yml
docs/adapters.md
docs/api.md
docs/architecture.md
docs/bugs-journal.md
docs/client.md
docs/config.md
docs/engines.md
docs/known-issues.md
docs/usage.md
etc/compat.yaml
etc/compat_template.yaml
etc/infra.yaml
etc/llm-infer.yaml
etc/models.yaml
etc/native.yaml
etc/ollama.yaml
etc/peft.yaml
etc/pip.conf
etc/uvicorn.yaml
etc/vllm-server.yaml
etc/vllm.yaml
examples/client/client.py
examples/client/client.yaml
examples/client/test_retry_client.py
examples/client/test_retry_server.py
llm_infer/__init__.py
llm_infer/adapter_meta.py
llm_infer/compat.py
llm_infer/context.py
llm_infer/logging_setup.py
llm_infer/py.typed
llm_infer.egg-info/PKG-INFO
llm_infer.egg-info/SOURCES.txt
llm_infer.egg-info/dependency_links.txt
llm_infer.egg-info/entry_points.txt
llm_infer.egg-info/requires.txt
llm_infer.egg-info/top_level.txt
llm_infer/api/__init__.py
llm_infer/cli/__init__.py
llm_infer/cli/cli.py
llm_infer/cli/config/__init__.py
llm_infer/cli/config/models.py
llm_infer/cli/tools/__init__.py
llm_infer/cli/tools/compat.py
llm_infer/cli/tools/metrics.py
llm_infer/cli/tools/query.py
llm_infer/cli/tools/serve.py
llm_infer/client/__init__.py
llm_infer/client/base.py
llm_infer/client/client.py
llm_infer/client/discovery.py
llm_infer/client/errors.py
llm_infer/client/factory.py
llm_infer/client/router.py
llm_infer/client/saia.py
llm_infer/client/types.py
llm_infer/client/backends/__init__.py
llm_infer/client/backends/anthropic.py
llm_infer/client/backends/base.py
llm_infer/client/backends/openai.py
llm_infer/engines/__init__.py
llm_infer/engines/ollama.py
llm_infer/engines/peft.py
llm_infer/engines/protocol.py
llm_infer/engines/vllm.py
llm_infer/engines/vllm_common.py
llm_infer/engines/vllm_server.py
llm_infer/engines/native/__init__.py
llm_infer/engines/native/config.py
llm_infer/engines/native/engine.py
llm_infer/engines/native/generation.py
llm_infer/engines/native/protocols.py
llm_infer/engines/native/scheduler.py
llm_infer/engines/native/attention/__init__.py
llm_infer/engines/native/attention/kv_ops.py
llm_infer/engines/native/attention/rope.py
llm_infer/engines/native/attention/backends/__init__.py
llm_infer/engines/native/attention/backends/flashinfer.py
llm_infer/engines/native/attention/backends/naive.py
llm_infer/engines/native/backends/__init__.py
llm_infer/engines/native/backends/linear/__init__.py
llm_infer/engines/native/backends/linear/registry.py
llm_infer/engines/native/backends/linear/formats/__init__.py
llm_infer/engines/native/backends/linear/formats/awq.py
llm_infer/engines/native/backends/linear/formats/base.py
llm_infer/engines/native/backends/linear/formats/fp8.py
llm_infer/engines/native/backends/linear/kernels/__init__.py
llm_infer/engines/native/backends/linear/kernels/awq_marlin.py
llm_infer/engines/native/backends/linear/kernels/awq_pytorch.py
llm_infer/engines/native/backends/linear/kernels/fp8_cutlass.py
llm_infer/engines/native/backends/linear/kernels/fp8_pytorch.py
llm_infer/engines/native/guards/__init__.py
llm_infer/engines/native/guards/protocol.py
llm_infer/engines/native/guards/repetition.py
llm_infer/engines/native/kv_cache/__init__.py
llm_infer/engines/native/kv_cache/pool.py
llm_infer/engines/native/kv_cache/sequence.py
llm_infer/engines/native/model/__init__.py
llm_infer/engines/native/model/architecture.py
llm_infer/engines/native/model/config.py
llm_infer/engines/native/model/layers.py
llm_infer/engines/native/model/transformer.py
llm_infer/engines/native/sampler/__init__.py
llm_infer/engines/native/sampler/sampler.py
llm_infer/engines/native/tokenizer/__init__.py
llm_infer/engines/native/tokenizer/config.py
llm_infer/engines/native/tokenizer/huggingface.py
llm_infer/models/__init__.py
llm_infer/models/config.py
llm_infer/models/metadata.py
llm_infer/models/resolver.py
llm_infer/response/__init__.py
llm_infer/response/events.py
llm_infer/response/latex.py
llm_infer/response/processor.py
llm_infer/response/protocols.py
llm_infer/response/utf8.py
llm_infer/response/parsers/__init__.py
llm_infer/response/parsers/base.py
llm_infer/response/parsers/code.py
llm_infer/response/parsers/latex.py
llm_infer/response/parsers/think.py
llm_infer/response/resolvers/__init__.py
llm_infer/response/resolvers/base.py
llm_infer/response/resolvers/terminal.py
llm_infer/schemas/__init__.py
llm_infer/schemas/openai.py
llm_infer/serving/__init__.py
llm_infer/serving/adapters.py
llm_infer/serving/api/__init__.py
llm_infer/serving/api/adapters.py
llm_infer/serving/api/errors.py
llm_infer/serving/api/routes.py
llm_infer/serving/api/schemas.py
llm_infer/serving/api/trace.py
llm_infer/serving/api/openai/__init__.py
llm_infer/serving/api/openai/errors.py
llm_infer/serving/api/openai/mappers.py
llm_infer/serving/api/openai/router.py
llm_infer/serving/api/openai/streaming.py
llm_infer/serving/api/openai/streaming_generators.py
llm_infer/serving/dispatch/README.md
llm_infer/serving/dispatch/__init__.py
llm_infer/serving/dispatch/config.py
llm_infer/serving/dispatch/config_overrides.py
llm_infer/serving/dispatch/errors.py
llm_infer/serving/dispatch/factories.py
llm_infer/serving/dispatch/handler.py
llm_infer/serving/dispatch/loop.py
llm_infer/serving/dispatch/main.py
llm_infer/serving/dispatch/metrics.py
llm_infer/serving/dispatch/processors.py
llm_infer/serving/dispatch/progress.py
llm_infer/serving/dispatch/types.py
llm_infer/serving/dispatch/warmup.py
llm_infer/serving/dispatch/handlers/__init__.py
llm_infer/serving/dispatch/handlers/batching.py
llm_infer/serving/dispatch/handlers/bounded.py
llm_infer/serving/dispatch/handlers/concurrent_http.py
llm_infer/serving/dispatch/handlers/sequential.py
tests/__init__.py
tests/test_config.py
tests/e2e/__init__.py
tests/integration/__init__.py
tests/unit/__init__.py
tests/unit/adapters_test.py
tests/unit/architecture_test.py
tests/unit/attention_backend_test.py
tests/unit/awq_linear_test.py
tests/unit/compat_test.py
tests/unit/concurrent_http_handler_test.py
tests/unit/config_overrides_test.py
tests/unit/context_test.py
tests/unit/engine_config_test.py
tests/unit/fp8_pytorch_test.py
tests/unit/generation_helpers_test.py
tests/unit/guards_test.py
tests/unit/handler_adapter_test.py
tests/unit/imports_test.py
tests/unit/kv_cache_test.py
tests/unit/layers_quantized_test.py
tests/unit/layers_rmsnorm_test.py
tests/unit/logging_setup_test.py
tests/unit/model_config_test.py
tests/unit/naive_attention_test.py
tests/unit/ollama_engine_test.py
tests/unit/openai_reasoning_compat_test.py
tests/unit/quant_format_test.py
tests/unit/registry_test.py
tests/unit/response_format_test.py
tests/unit/rope_test.py
tests/unit/sampler_test.py
tests/unit/scheduler_test.py
tests/unit/sequence_kv_cache_test.py
tests/unit/serve_model_overrides_test.py
tests/unit/think_tag_normalizer_test.py
tests/unit/tokenizer_config_test.py
tests/unit/tool_calling_test.py
tests/unit/vllm_common_test.py
tests/unit/vllm_server_engine_test.py
tests/unit/weight_formats_test.py
tests/unit/cli/__init__.py
tests/unit/cli/config/__init__.py
tests/unit/cli/config/models_test.py
tests/unit/cli/tools/__init__.py
tests/unit/cli/tools/metrics_tool_test.py
tests/unit/client/__init__.py
tests/unit/client/test_anthropic_backend.py
tests/unit/client/test_anthropic_backend_extra.py
tests/unit/client/test_client.py
tests/unit/client/test_discovery.py
tests/unit/client/test_errors.py
tests/unit/client/test_openai_backend.py
tests/unit/client/test_openai_backend_extra.py
tests/unit/client/test_router.py
tests/unit/client/test_saia_adapter.py
tests/unit/client/test_types.py
tests/unit/engines/__init__.py
tests/unit/engines/ollama_engine_test.py
tests/unit/models/__init__.py
tests/unit/models/metadata_test.py
tests/unit/models/resolver_test.py
tests/unit/response/__init__.py
tests/unit/response/base_resolver_test.py
tests/unit/response/code_parser_test.py
tests/unit/response/events_test.py
tests/unit/response/latex_transformer_test.py
tests/unit/response/processor_test.py
tests/unit/response/think_extra_test.py
tests/unit/response/think_parser_test.py
tests/unit/response/parsers/__init__.py
tests/unit/response/parsers/base_test.py
tests/unit/serving/__init__.py
tests/unit/serving/adapters_scan_test.py
tests/unit/serving/api/__init__.py
tests/unit/serving/api/adapters_api_test.py
tests/unit/serving/api/routes_test.py
tests/unit/serving/api/trace_test.py
tests/unit/serving/api/openai/__init__.py
tests/unit/serving/api/openai/errors_test.py
tests/unit/serving/api/openai/mappers_test.py
tests/unit/serving/api/openai/router_test.py
tests/unit/serving/api/openai/streaming_generators_test.py
tests/unit/serving/api/openai/streaming_test.py
tests/unit/serving/dispatch/__init__.py
tests/unit/serving/dispatch/_helpers.py
tests/unit/serving/dispatch/errors_test.py
tests/unit/serving/dispatch/factories_test.py
tests/unit/serving/dispatch/handler_test.py
tests/unit/serving/dispatch/loop_test.py
tests/unit/serving/dispatch/metrics_test.py
tests/unit/serving/dispatch/processors_test.py
tests/unit/serving/dispatch/progress_test.py
tests/unit/serving/dispatch/warmup_test.py
tests/unit/serving/dispatch/handlers/__init__.py
tests/unit/serving/dispatch/handlers/batching_test.py
tests/unit/serving/dispatch/handlers/bounded_test.py
tests/unit/serving/dispatch/handlers/sequential_test.py