xcena-dev · seohui-XCENA · Mar 6, 2026 · Mar 6, 2026 · Mar 9, 2026 · Mar 9, 2026
diff --git a/docs/source/assets/maru-kvcache.gif b/docs/source/assets/maru-kvcache.gif
diff --git a/docs/source/kv_cache/storage_backends/index.rst b/docs/source/kv_cache/storage_backends/index.rst
@@ -15,6 +15,7 @@ Supported Backends
    gds
    infinistore
    local_storage
+   maru
    mock
    mooncake
    nixl

diff --git a/docs/source/kv_cache/storage_backends/maru.rst b/docs/source/kv_cache/storage_backends/maru.rst
@@ -0,0 +1,113 @@
+Maru
+====
+
+.. _maru-overview:
+
+Overview
+--------
+
+`Maru <https://github.com/xcena-dev/maru>`_ is a high-performance KV cache storage engine built on CXL shared memory,
+designed for LLM inference scenarios where multiple instances need to share a KV cache with minimal latency.
+
+.. image:: ../../assets/maru-kvcache.gif
+    :alt: KV Cache Sharing: Without vs With Maru
+
+For architecture details, see the `Maru documentation <https://xcena-dev.github.io/maru/>`_.
+
+Quick Start
+-----------
+
+Install Maru:
+
+.. code-block:: bash
+
+    git clone https://github.com/xcena-dev/maru.git
+    cd maru
+    ./install.sh
+
+This installs ``maru-server``, ``maru-resourced``, and the ``maru`` Python package.
+
+Deploy Model With Maru
+~~~~~~~~~~~~~~~~~~~~~~
+
+**Prerequisites:** CXL device (``/dev/dax*``), Python 3.12+, vLLM and LMCache installed.
+
+**1. Start the Maru Server**
+
+.. code-block:: bash
+
+    maru-server
+
+**2. Create configuration file** (``maru-config.yaml``):
+
+.. code-block:: yaml
+
+    chunk_size: 256
+    local_cpu: False
+    max_local_cpu_size: 0
+    save_unfull_chunk: True
+
+    # Maru backend
+    maru_path: "maru://localhost:5555"
+    maru_pool_size: 4
+
+**3. Start vLLM with Maru**
+
+.. code-block:: bash
+
+    LMCACHE_CONFIG_FILE="maru-config.yaml" \
+    vllm serve \
+        meta-llama/Llama-3.1-8B-Instruct \
+        --max-model-len 65536 \
+        --kv-transfer-config \
+        '{"kv_connector":"LMCacheConnectorV1", "kv_role":"kv_both"}'
+
+Configuration
+-------------
+
+**LMCache Parameters:**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 25 15 60
+
+   * - Parameter
+     - Default
+     - Description
+   * - ``maru_path``
+     - Required
+     - Maru server URL (format: ``maru://host:port``)
+   * - ``maru_pool_size``
+     - ``4.0``
+     - CXL memory pool size per instance in GB (e.g., ``4``, ``0.5``)
+
+**Advanced Parameters (via extra_config):**
+
+.. list-table::
+   :header-rows: 1
+   :widths: 25 15 60
+
+   * - Parameter
+     - Default
+     - Description
+   * - ``maru_instance_id``
+     - auto UUID
+     - Unique client instance identifier
+   * - ``maru_timeout_ms``
+     - 5000
+     - ZMQ RPC socket timeout in milliseconds
+   * - ``maru_use_async_rpc``
+     - true
+     - Async DEALER-ROUTER RPC (``false`` for synchronous REQ-REP)
+   * - ``maru_max_inflight``
+     - 64
+     - Max concurrent async RPC requests
+   * - ``maru_eager_map``
+     - true
+     - Pre-map all shared regions on connect
+
+Additional Resources
+--------------------
+
+- `Maru GitHub Repository <https://github.com/xcena-dev/maru>`_
+- `Maru Documentation <https://xcena-dev.github.io/maru/>`_
diff --git a/lmcache/v1/config.py b/lmcache/v1/config.py
@@ -236,6 +236,13 @@
         "default": None,
         "env_converter": int,
     },
+    # Maru CXL shared memory backend
+    "maru_path": {"type": Optional[str], "default": None, "env_converter": str},
+    "maru_pool_size": {
+        "type": float,
+        "default": 4.0,
+        "env_converter": float,
+    },
     # Other configurations
     # (Deprecated) The url of the actual remote lmcache instance for auditing.
     # Please use extra_config['audit_actual_remote_url'] instead.

diff --git a/lmcache/v1/storage_backend/__init__.py b/lmcache/v1/storage_backend/__init__.py
@@ -218,6 +218,20 @@ def CreateStorageBackends(
         )
         storage_backends[str(gds_backend)] = gds_backend
 
+    if config.maru_path is not None and "MaruBackend" not in _skip:
+        try:
+            # First Party
+            from lmcache.v1.storage_backend.maru_backend import MaruBackend
+        except ImportError as e:
+            raise ImportError(
+                "The 'maru' and 'maru_lmcache' packages are required "
+                "to use MaruBackend. Please install them according to "
+                "the Maru setup documentation."
+            ) from e
+
+        maru_backend = MaruBackend(config, metadata, loop, dst_device)
+        storage_backends[str(maru_backend)] = maru_backend
+
     if config.remote_url is not None and "RemoteBackend" not in _skip:
         assert local_cpu_backend is not None, (
             "Remote backend requires local CPU backend as a buffer."
-Original file line number
+Diff line change
@@ Expand Up / @@ -15,6 +15,7 @@ Supported Backends @@
        gds
        infinistore
        local_storage
+       maru
        mock
        mooncake
        nixl
@@ Expand Down @@