.clang-format
.gitignore
.isort.cfg
.pre-commit-config.yaml
CODE_OF_CONDUCT.md
CONTRIBUTING.md
DCO
LICENSE
MAINTAINERS.md
MANIFEST.in
README.md
SECURITY.md
format.sh
pyproject.toml
setup.py
.buildkite/README.md
.buildkite/pipeline.yml
.buildkite/vllm-integration-tests.yml
.buildkite/cases/comprehensive-cases.txt
.buildkite/cases/integration-cases.txt
.buildkite/configs/dummy.yaml
.buildkite/configs/gds.yaml
.buildkite/configs/local_cpu.yaml
.buildkite/configs/local_cpu_mla.yaml
.buildkite/configs/local_disk.yaml
.buildkite/configs/pd.yaml
.buildkite/correctness/README.md
.buildkite/correctness/mmlu-test.py
.buildkite/correctness/pipeline.mmlu.yml
.buildkite/correctness/setup.sh
.buildkite/correctness/summarize-results.py
.buildkite/pipelines/clean.yml
.buildkite/pipelines/comprehensive-tests.yml
.buildkite/pipelines/end-to-end-tests.yml
.buildkite/scripts/bare-machine-cleanup.sh
.buildkite/scripts/clean.sh
.buildkite/scripts/end-to-end-test.sh
.buildkite/scripts/gpu_zombie_killer.sh
.buildkite/scripts/multi-round-qa.sh
.buildkite/scripts/pick-free-gpu.sh
.buildkite/scripts/vllm-integration-tests.sh
.github/PULL_REQUEST_TEMPLATE.md
.github/dependabot.yml
.github/ISSUE_TEMPLATE/blank_issue.md
.github/ISSUE_TEMPLATE/bug_report.md
.github/ISSUE_TEMPLATE/feature_request.md
.github/actions/free-disk-space/action.yml
.github/workflows/actionlint.dockerfile
.github/workflows/actionlint.yml
.github/workflows/build_doc.yml
.github/workflows/code_quality_checks.yml
.github/workflows/codeql.yml
.github/workflows/nightly_build.yml
.github/workflows/publish.yml
.github/workflows/scorecard.yml
.github/workflows/stale_bot.yml
.github/workflows/matchers/actionlint.json
.github/workflows/matchers/mypy.json
asset/logo.png
benchmarks/long_doc_qa/long_doc_qa.py
benchmarks/long_doc_qa/long_doc_qa_recommender.py
benchmarks/multi_doc_qa/README.md
benchmarks/multi_doc_qa/lmcache.yaml
benchmarks/multi_doc_qa/lmcache_blend.yaml
benchmarks/multi_doc_qa/multi_doc_qa.py
benchmarks/multi_round_qa/README.md
benchmarks/multi_round_qa/data_preprocessing.py
benchmarks/multi_round_qa/multi-round-qa.py
benchmarks/multi_round_qa/prepare_sharegpt_data.sh
benchmarks/multi_round_qa/requirements.txt
benchmarks/multi_round_qa/utils.py
benchmarks/rag/README.md
benchmarks/rag/launch_lmcache.sh
benchmarks/rag/launch_vllm.sh
benchmarks/rag/precompute.py
benchmarks/rag/rag.py
benchmarks/rag/requirements.txt
benchmarks/rag/utils.py
benchmarks/ttft-estimator/llama-h100-example.png
benchmarks/ttft-estimator/ttft-estimator.py
csrc/ac_dec.cu
csrc/ac_enc.cu
csrc/cachegen_kernels.cuh
csrc/cal_cdf.cu
csrc/cuda_compat.h
csrc/dispatch_utils.h
csrc/mem_alloc.cpp
csrc/mem_alloc.h
csrc/mem_kernels.cu
csrc/mem_kernels.cuh
csrc/pos_kernels.cu
csrc/pos_kernels.cuh
csrc/pybind.cpp
csrc/utils.cpp
csrc/utils.h
docker/Dockerfile
docker/example_build.sh
docker/example_run.sh
docs/Makefile
docs/README.md
docs/make.bat
docs/source/.nojekyll
docs/source/conf.py
docs/source/index.rst
docs/source/_static/custom.css
docs/source/_static/custom.js
docs/source/_static/scroll.css
docs/source/_templates/custom.html
docs/source/api_reference/configurations.rst
docs/source/api_reference/dynamic_connector.rst
docs/source/api_reference/multimodality.rst
docs/source/api_reference/storage_backends.rst
docs/source/assets/InfiniStore-usage.png
docs/source/assets/lmcache-logo.png
docs/source/assets/lmcache-logo_crop.png
docs/source/assets/mooncake-store-preview.png
docs/source/community/blogs.rst
docs/source/community/meetings.rst
docs/source/developer_guide/contributing.rst
docs/source/developer_guide/docker_file.rst
docs/source/developer_guide/plugin_framework.rst
docs/source/developer_guide/usage/index.rst
docs/source/developer_guide/usage/usage_stats_collection.rst
docs/source/disaggregated_prefill/shared_storage.rst
docs/source/disaggregated_prefill/nixl/1p1d.rst
docs/source/disaggregated_prefill/nixl/index.rst
docs/source/disaggregated_prefill/nixl/xpyd.rst
docs/source/getting_started/benchmarking.rst
docs/source/getting_started/faq.rst
docs/source/getting_started/installation.rst
docs/source/getting_started/troubleshoot.rst
docs/source/getting_started/quickstart/disaggregated_prefill.rst
docs/source/getting_started/quickstart/index.rst
docs/source/getting_started/quickstart/multimodality.rst
docs/source/getting_started/quickstart/offload_kv_cache.rst
docs/source/getting_started/quickstart/share_kv_cache.rst
docs/source/internal_api_server/internal_api_server.rst
docs/source/kv_cache/caching_policies.rst
docs/source/kv_cache/storage_backends/cpu_ram.rst
docs/source/kv_cache/storage_backends/external_backend.rst
docs/source/kv_cache/storage_backends/gds.rst
docs/source/kv_cache/storage_backends/index.rst
docs/source/kv_cache/storage_backends/infinistore.rst
docs/source/kv_cache/storage_backends/local_storage.rst
docs/source/kv_cache/storage_backends/mooncake.rst
docs/source/kv_cache/storage_backends/nixl.rst
docs/source/kv_cache/storage_backends/redis.rst
docs/source/kv_cache/storage_backends/valkey.rst
docs/source/kv_cache/storage_backends/weka.rst
docs/source/kv_cache_management/check_finish.rst
docs/source/kv_cache_management/clear.rst
docs/source/kv_cache_management/compress.rst
docs/source/kv_cache_management/controller.rst
docs/source/kv_cache_management/health.rst
docs/source/kv_cache_management/lookup.rst
docs/source/kv_cache_management/move.rst
docs/source/kv_cache_management/pin.rst
docs/source/kv_cache_optimizations/blending.rst
docs/source/kv_cache_optimizations/compression/cachegen.rst
docs/source/kv_cache_optimizations/compression/index.rst
docs/source/production/docker_deployment.rst
docs/source/production/kubernetes_deployment.rst
docs/source/production/observability/index.rst
docs/source/production/observability/internal_api_server.rst
docs/source/production/observability/vllm_endpoint.rst
examples/blend_kv/README.md
examples/blend_kv/batched_kv.py
examples/blend_kv/batched_tp_kv.py
examples/blend_kv/blend_kv.py
examples/blend_kv/chunk1.txt
examples/blend_kv/chunk2.txt
examples/blend_kv/example_blending.yaml
examples/blend_kv/online_kv.py
examples/blend_kv/tp_kv.py
examples/blend_kv_v1/README.md
examples/blend_kv_v1/blend.py
examples/cache_controller/README.md
examples/cache_controller/clear/README.md
examples/cache_controller/clear/example.yaml
examples/cache_controller/compress/README.md
examples/cache_controller/compress/example.yaml
examples/cache_controller/health/README.md
examples/cache_controller/health/example.yaml
examples/cache_controller/lookup/README.md
examples/cache_controller/lookup/example.yaml
examples/cache_controller/move/README.md
examples/cache_controller/move/instance1.yaml
examples/cache_controller/move/instance2.yaml
examples/cache_controller/pin/README.md
examples/cache_controller/pin/example.yaml
examples/cache_interface/README.md
examples/cache_interface/example.yaml
examples/cache_with_configs/README.md
examples/cache_with_configs/example.yaml
examples/disagg_prefill/README.md
examples/disagg_prefill/disagg_proxy_server.py
examples/disagg_prefill/1p1d/README.md
examples/disagg_prefill/1p1d/disagg_example_nixl.sh
examples/disagg_prefill/1p1d/disagg_proxy_server_first_token_from_decoder.py
examples/disagg_prefill/1p1d/disagg_proxy_server_first_token_from_prefiller.py
examples/disagg_prefill/1p1d/disagg_vllm_launcher.sh
examples/disagg_prefill/1p1d/configs/lmcache-decoder-config.yaml
examples/disagg_prefill/1p1d/configs/lmcache-prefiller-config.yaml
examples/disagg_prefill/1p1d_experimental/README.md
examples/disagg_prefill/1p1d_experimental/disagg_example_1p1d.sh
examples/disagg_prefill/1p1d_experimental/disagg_vllm_launcher.sh
examples/disagg_prefill/1p1d_experimental/configs/lmcache-decoder-config.yaml
examples/disagg_prefill/1p1d_experimental/configs/lmcache-prefiller-config.yaml
examples/disagg_prefill/xp1d/README.md
examples/disagg_prefill/xp1d/disagg_example_xp1d.sh
examples/disagg_prefill/xp1d/disagg_proxy_server_first_token_from_decoder.py
examples/disagg_prefill/xp1d/disagg_proxy_server_first_token_from_prefiller.py
examples/disagg_prefill/xp1d/disagg_vllm_launcher.sh
examples/disagg_prefill/xp1d/configs/lmcache-decoder-config.yaml
examples/disagg_prefill/xp1d/configs/lmcache-prefiller-config.yaml
examples/disagg_prefill/xpyd_experimental/README.md
examples/disagg_prefill/xpyd_experimental/disagg_example_xpyd.sh
examples/disagg_prefill/xpyd_experimental/disagg_vllm_launcher.sh
examples/disagg_prefill/xpyd_experimental/configs/lmcache-decoder-1-config.yaml
examples/disagg_prefill/xpyd_experimental/configs/lmcache-decoder-2-config.yaml
examples/disagg_prefill/xpyd_experimental/configs/lmcache-prefiller-config.yaml
examples/frontend/README.md
examples/frontend/chat_session.py
examples/frontend/example.yaml
examples/frontend/ffmpeg.txt
examples/frontend/frontend.py
examples/kubernetes/health_probe.py
examples/kv_cache_calculator/README.md
examples/kv_cache_calculator/generate_config.py
examples/kv_cache_calculator/kv_cache_calculator.html
examples/kv_cache_calculator/modelconfig.json
examples/kv_cache_calculator/requirement.txt
examples/kv_cache_reuse/README.md
examples/kv_cache_reuse/local_backends/README.md
examples/kv_cache_reuse/local_backends/offload.py
examples/kv_cache_reuse/remote_backends/README.md
examples/kv_cache_reuse/remote_backends/external/README.md
examples/kv_cache_reuse/remote_backends/external/backend_type.yaml
examples/kv_cache_reuse/remote_backends/infinistore/README.md
examples/kv_cache_reuse/remote_backends/infinistore/backend_type.yaml
examples/kv_cache_reuse/remote_backends/mock/README.md
examples/kv_cache_reuse/remote_backends/mock/example.yaml
examples/kv_cache_reuse/remote_backends/mooncakestore/README.md
examples/kv_cache_reuse/remote_backends/mooncakestore/mooncake.yaml
examples/kv_cache_reuse/remote_backends/s3/README.md
examples/kv_cache_reuse/remote_backends/s3/example.yaml
examples/kv_cache_reuse/share_across_instances/README.md
examples/kv_cache_reuse/share_across_instances/centralized_sharing/README.md
examples/kv_cache_reuse/share_across_instances/centralized_sharing/example.yaml
examples/kv_cache_reuse/share_across_instances/p2p_sharing/README.md
examples/kv_cache_reuse/share_across_instances/p2p_sharing/example1.yaml
examples/kv_cache_reuse/share_across_instances/p2p_sharing/example2.yaml
examples/online_session/README.md
examples/online_session/bench_ttft_sweep.sh
examples/online_session/example.yaml
examples/online_session/ffmpeg.txt
examples/online_session/openai_chat_completion_client.py
examples/plugins/README.md
examples/plugins/all_plugin.sh
examples/plugins/scheduler_foo_plugin.py
examples/plugins/worker_0_test.sh
examples/redis_lookup/README.md
examples/sgl_integration/README.md
examples/sgl_integration/lmcache_config.yaml
lmcache/__init__.py
lmcache/_version.py
lmcache/cache_engine.py
lmcache/config.py
lmcache/connections.py
lmcache/logging.py
lmcache/observability.py
lmcache/protocol.py
lmcache/usage_context.py
lmcache/utils.py
lmcache.egg-info/PKG-INFO
lmcache.egg-info/SOURCES.txt
lmcache.egg-info/dependency_links.txt
lmcache.egg-info/entry_points.txt
lmcache.egg-info/requires.txt
lmcache.egg-info/top_level.txt
lmcache/blend/__init__.py
lmcache/blend/executor.py
lmcache/blend/interfaces.py
lmcache/blend/retriever.py
lmcache/integration/__init__.py
lmcache/integration/sglang/__init__.py
lmcache/integration/sglang/sglang_adapter.py
lmcache/integration/sglang/utils.py
lmcache/integration/vllm/__init__.py
lmcache/integration/vllm/lmcache_connector_v1.py
lmcache/integration/vllm/lmcache_connector_v1_085.py
lmcache/integration/vllm/utils.py
lmcache/integration/vllm/vllm_adapter.py
lmcache/integration/vllm/vllm_v1_adapter.py
lmcache/server/__init__.py
lmcache/server/__main__.py
lmcache/server/server_storage_backend/__init__.py
lmcache/server/server_storage_backend/abstract_backend.py
lmcache/server/server_storage_backend/local_backend.py
lmcache/storage_backend/__init__.py
lmcache/storage_backend/abstract_backend.py
lmcache/storage_backend/hybrid_backend.py
lmcache/storage_backend/local_backend.py
lmcache/storage_backend/remote_backend.py
lmcache/storage_backend/connector/__init__.py
lmcache/storage_backend/connector/base_connector.py
lmcache/storage_backend/connector/lm_connector.py
lmcache/storage_backend/connector/redis_connector.py
lmcache/storage_backend/evictor/__init__.py
lmcache/storage_backend/evictor/base_evictor.py
lmcache/storage_backend/evictor/lru_evictor.py
lmcache/storage_backend/mem_pool/__init__.py
lmcache/storage_backend/mem_pool/base_pool.py
lmcache/storage_backend/mem_pool/local_pool.py
lmcache/storage_backend/serde/__init__.py
lmcache/storage_backend/serde/cachegen_basics.py
lmcache/storage_backend/serde/cachegen_decoder.py
lmcache/storage_backend/serde/cachegen_encoder.py
lmcache/storage_backend/serde/fast_serde.py
lmcache/storage_backend/serde/safe_serde.py
lmcache/storage_backend/serde/serde.py
lmcache/storage_backend/serde/torch_serde.py
lmcache/v1/__init__.py
lmcache/v1/cache_engine.py
lmcache/v1/cache_interface.py
lmcache/v1/config.py
lmcache/v1/event_manager.py
lmcache/v1/gpu_connector.py
lmcache/v1/memory_management.py
lmcache/v1/protocol.py
lmcache/v1/rpc_utils.py
lmcache/v1/system_detection.py
lmcache/v1/token_database.py
lmcache/v1/api_server/__init__.py
lmcache/v1/api_server/__main__.py
lmcache/v1/cache_controller/__init__.py
lmcache/v1/cache_controller/controller_manager.py
lmcache/v1/cache_controller/executor.py
lmcache/v1/cache_controller/message.py
lmcache/v1/cache_controller/utils.py
lmcache/v1/cache_controller/worker.py
lmcache/v1/cache_controller/controllers/__init__.py
lmcache/v1/cache_controller/controllers/kv_controller.py
lmcache/v1/cache_controller/controllers/registration_controller.py
lmcache/v1/compute/__init__.py
lmcache/v1/compute/positional_encoding.py
lmcache/v1/compute/attention/__init__.py
lmcache/v1/compute/attention/abstract.py
lmcache/v1/compute/attention/flash_attn.py
lmcache/v1/compute/attention/flash_infer_sparse.py
lmcache/v1/compute/attention/metadata.py
lmcache/v1/compute/attention/utils.py
lmcache/v1/compute/blend/__init__.py
lmcache/v1/compute/blend/blender.py
lmcache/v1/compute/blend/metadata.py
lmcache/v1/compute/blend/utils.py
lmcache/v1/compute/models/__init__.py
lmcache/v1/compute/models/llama.py
lmcache/v1/compute/models/utils.py
lmcache/v1/distributed_server/__init__.py
lmcache/v1/distributed_server/abstract_server.py
lmcache/v1/distributed_server/naive_server.py
lmcache/v1/internal_api_server/__init__.py
lmcache/v1/internal_api_server/api_registry.py
lmcache/v1/internal_api_server/api_server.py
lmcache/v1/internal_api_server/conf_api.py
lmcache/v1/internal_api_server/loglevel_api.py
lmcache/v1/internal_api_server/metrics_api.py
lmcache/v1/internal_api_server/run_script_api.py
lmcache/v1/internal_api_server/thread_api.py
lmcache/v1/internal_api_server/utils.py
lmcache/v1/internal_api_server/version_api.py
lmcache/v1/lookup_client/__init__.py
lmcache/v1/lookup_client/abstract_client.py
lmcache/v1/lookup_client/factory.py
lmcache/v1/lookup_client/lmcache_async_lookup_client.py
lmcache/v1/lookup_client/lmcache_lookup_client.py
lmcache/v1/lookup_client/mooncake_lookup_client.py
lmcache/v1/lookup_server/__init__.py
lmcache/v1/lookup_server/abstract_server.py
lmcache/v1/lookup_server/redis_server.py
lmcache/v1/offload_server/__init__.py
lmcache/v1/offload_server/abstract_server.py
lmcache/v1/offload_server/message.py
lmcache/v1/offload_server/zmq_server.py
lmcache/v1/plugin/__init__.py
lmcache/v1/plugin/plugin_launcher.py
lmcache/v1/server/__init__.py
lmcache/v1/server/__main__.py
lmcache/v1/server/utils.py
lmcache/v1/server/storage_backend/__init__.py
lmcache/v1/server/storage_backend/abstract_backend.py
lmcache/v1/server/storage_backend/local_backend.py
lmcache/v1/storage_backend/__init__.py
lmcache/v1/storage_backend/abstract_backend.py
lmcache/v1/storage_backend/audit_backend.py
lmcache/v1/storage_backend/gds_backend.py
lmcache/v1/storage_backend/local_cpu_backend.py
lmcache/v1/storage_backend/local_disk_backend.py
lmcache/v1/storage_backend/nixl_backend.py
lmcache/v1/storage_backend/nixl_backend_v3.py
lmcache/v1/storage_backend/nixl_storage_backend.py
lmcache/v1/storage_backend/remote_backend.py
lmcache/v1/storage_backend/remote_monitor.py
lmcache/v1/storage_backend/storage_backend_listener.py
lmcache/v1/storage_backend/storage_manager.py
lmcache/v1/storage_backend/weka_gds_backend.py
lmcache/v1/storage_backend/cache_policy/__init__.py
lmcache/v1/storage_backend/cache_policy/base_policy.py
lmcache/v1/storage_backend/cache_policy/fifo.py
lmcache/v1/storage_backend/cache_policy/lfu.py
lmcache/v1/storage_backend/cache_policy/lru.py
lmcache/v1/storage_backend/cache_policy/mru.py
lmcache/v1/storage_backend/connector/__init__.py
lmcache/v1/storage_backend/connector/audit_adapter.py
lmcache/v1/storage_backend/connector/audit_connector.py
lmcache/v1/storage_backend/connector/base_connector.py
lmcache/v1/storage_backend/connector/blackhole_adapter.py
lmcache/v1/storage_backend/connector/blackhole_connector.py
lmcache/v1/storage_backend/connector/external_adapter.py
lmcache/v1/storage_backend/connector/fs_adapter.py
lmcache/v1/storage_backend/connector/fs_connector.py
lmcache/v1/storage_backend/connector/infinistore_adapter.py
lmcache/v1/storage_backend/connector/infinistore_connector.py
lmcache/v1/storage_backend/connector/instrumented_connector.py
lmcache/v1/storage_backend/connector/lm_adapter.py
lmcache/v1/storage_backend/connector/lm_connector.py
lmcache/v1/storage_backend/connector/mock_adapter.py
lmcache/v1/storage_backend/connector/mock_connector.py
lmcache/v1/storage_backend/connector/mooncakestore_adapter.py
lmcache/v1/storage_backend/connector/mooncakestore_connector.py
lmcache/v1/storage_backend/connector/nixl_connector.py
lmcache/v1/storage_backend/connector/nixl_connector_v2.py
lmcache/v1/storage_backend/connector/nixl_connector_v3.py
lmcache/v1/storage_backend/connector/nixl_utils.py
lmcache/v1/storage_backend/connector/redis_adapter.py
lmcache/v1/storage_backend/connector/redis_connector.py
lmcache/v1/storage_backend/connector/s3_adapter.py
lmcache/v1/storage_backend/connector/s3_connector.py
lmcache/v1/storage_backend/job_executor/__init__.py
lmcache/v1/storage_backend/job_executor/base_executor.py
lmcache/v1/storage_backend/job_executor/pq_executor.py
lmcache/v1/storage_backend/naive_serde/__init__.py
lmcache/v1/storage_backend/naive_serde/cachegen_basics.py
lmcache/v1/storage_backend/naive_serde/cachegen_decoder.py
lmcache/v1/storage_backend/naive_serde/cachegen_encoder.py
lmcache/v1/storage_backend/naive_serde/kivi_serde.py
lmcache/v1/storage_backend/naive_serde/naive_serde.py
lmcache/v1/storage_backend/naive_serde/serde.py
requirements/bench.txt
requirements/build.txt
requirements/common.txt
requirements/cuda.txt
requirements/docs.txt
requirements/lint.txt
requirements/test.txt
tests/__init__.py
tests/conftest.py
tests/pytest.ini
tests/test_blend.py
tests/test_evictor.py
tests/test_observability.py
tests/test_protocol.py
tests/test_serde.py
tests/benchmarks/test_benchmark.py
tests/benchmarks/test_cachegen.py
tests/data/test_creation_from_file/disk.yaml
tests/data/test_creation_from_file/fail.yaml
tests/data/test_creation_from_file/hybrid.yaml
tests/data/test_creation_from_file/local.yaml
tests/data/test_creation_from_file/remote.yaml
tests/disagg/README.md
tests/disagg/test_nixl_cache_engine.py
tests/disagg/test_nixl_channel.py
tests/disagg/test_nixl_channel_v2.py
tests/disagg/test_nixl_pipe.py
tests/disagg/test_nixl_pipe_v2.py
tests/disagg/test_nixl_storage_backend.py
tests/v1/__init__.py
tests/v1/test_cache_engine.py
tests/v1/test_cache_interface.py
tests/v1/test_cache_policy.py
tests/v1/test_config.py
tests/v1/test_connector.py
tests/v1/test_gds.py
tests/v1/test_gpu_connector.py
tests/v1/test_mem_kernels.py
tests/v1/test_memory_management.py
tests/v1/test_nixl_storage.py
tests/v1/test_pos_kernels.py
tests/v1/test_remote_mla_worker_id_as0.py
tests/v1/test_token_database.py
tests/v1/test_vllm_integration.py
tests/v1/test_weka.py
tests/v1/utils.py
tests/v1/data/gds.yaml
tests/v1/data/nixl.yaml
tests/v1/data/test_config.yaml
tests/v1/data/weka.yaml
tests/v1/storage_backend/test_gds_backend.py
tests/v1/storage_backend/test_local_cpu_backend.py
tests/v1/storage_backend/test_local_disk_backend.py
tools/check_spdx_header.py