NVIDIA · podkidyshev · May 28, 2026 · May 28, 2026 · May 28, 2026 · May 28, 2026
@@ -17,7 +17,7 @@
 name = "sglang"
 description = "sglang backend"
 test_template_name = "AIDynamo"
-extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"]
+extra_container_mounts = ["/run/udev:/run/udev"]
 
 [cmd_args]
 docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1"
@@ -59,23 +59,8 @@ workloads = "aiperf.sh"
       host = "0.0.0.0"
       disaggregation-transfer-backend = "nixl"
 
-  [cmd_args.lmcache]
-  controller_cmd = "lmcache_controller --host localhost --port 9000 --monitor-port 9001"
-
-    [cmd_args.lmcache.args]
-    chunk_size = 256
-    local_cpu = false
-    nixl_buffer_size = 10737418240
-    nixl_buffer_device = "cuda"
-    extra_config_enable_nixl_storage = true
-    extra_config_nixl_backend = "GDS_MT"
-    extra_config_nixl_file_pool_size = 64
-
-    enable_controller = true
-    lmcache_instance_id = "lmcache_default_instance"
-    controller_url = "localhost:9001"
-    lmcache_worker_port = 8788
-    distributed_url = "localhost:8789"
+  [cmd_args.lmcache_controller]
+  cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001"
 
   [cmd_args.genai_perf]
   cmd = "genai-perf profile"

@@ -17,7 +17,7 @@
 name = "vLLM"
 description = "vLLM backend"
 test_template_name = "AIDynamo"
-extra_container_mounts = ["/run/udev:/run/udev", "/tmp:/tmp"]
+extra_container_mounts = ["/run/udev:/run/udev"]
 
 [cmd_args]
 docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
@@ -53,23 +53,8 @@ workloads = "aiperf.sh"
       data-parallel-size = 1
       kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
 
-  [cmd_args.lmcache]
-  controller_cmd = "lmcache_controller --host localhost --port 9000 --monitor-port 9001"
-
-    [cmd_args.lmcache.args]
-    chunk_size = 256
-    local_cpu = false
-    nixl_buffer_size = 10737418240
-    nixl_buffer_device = "cuda"
-    extra_config_enable_nixl_storage = true
-    extra_config_nixl_backend = "GDS_MT"
-    extra_config_nixl_file_pool_size = 64
-
-    enable_controller = true
-    lmcache_instance_id = "lmcache_default_instance"
-    controller_url = "localhost:9001"
-    lmcache_worker_port = 8788
-    distributed_url = "localhost:8789"
+  [cmd_args.lmcache_controller]
+  cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001"
 
   [cmd_args.genai_perf]
   cmd = "genai-perf profile"

@@ -0,0 +1,117 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name = "dynamo-vllm-lmcache"
+job_status_check = false
+
+[[Tests]]
+id = "test.disagg.lmcache-controller"
+name = "vllm-disagg-lmcache-controller"
+description = "Self-contained AIDynamo scenario wiring vLLM disaggregated inference, LMCache config propagation, and LMCache controller launch."
+test_template_name = "AIDynamo"
+time_limit = "00:10:00"
+extra_container_mounts = ["/run/udev:/run/udev"]
+dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
+
+  [Tests.cmd_args]
+  docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
+  # storage_cache_dir = "/lustre/.../install/tmp"
+  workloads = "aiperf.sh"
+
+    [Tests.cmd_args.lmcache]
+    enable_controller = true
+    chunk_size = 256
+    local_cpu = true
+    lmcache_instance_id = "lmcache_default_instance"
+    controller_pull_url = "{frontend_node}:8300"
+    controller_reply_url = "{frontend_node}:8400"
+    lmcache_worker_ports = [8788, 8789, 8790, 8791]
+    max_local_cpu_size = 6.0
+    nixl_buffer_size = 2079377920
+    nixl_buffer_device = "cpu"
+
+      [Tests.cmd_args.lmcache.extra_config]
+      enable_nixl_storage = false
+      nixl_backend = "POSIX"
+      nixl_path = "{storage_cache_dir}"
+      nixl_pool_size = 2048
+
+    [Tests.cmd_args.lmcache_controller]
+    cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-ports {\"pull\":8300,\"reply\":8400}"
+
+    [Tests.cmd_args.dynamo]
+    backend = "vllm"
+    model = "Qwen/Qwen3-0.6B"
+
+      [Tests.cmd_args.dynamo.prefill_worker]
+      num-nodes = 1
+      cmd = 'python3 -m dynamo.vllm --is-prefill-worker'
+      worker-initialized-regex = 'VllmWorker.*has.been.initialized'
+      extra-args = "--no-enable-expert-parallel"
+
+        [Tests.cmd_args.dynamo.prefill_worker.args]
+        gpu-memory-utilization = 0.8
+        kv-transfer-config = '{"kv_connector":"MultiConnector","kv_role":"kv_both","kv_connector_extra_config":{"connectors":[{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"},{"kv_connector":"NixlConnector","kv_role":"kv_both"}]}}'
+        tensor-parallel-size = 4
+        pipeline-parallel-size = 1
+        data-parallel-size = 1
+
+      [Tests.cmd_args.dynamo.decode_worker]
+      num-nodes = 1
+      cmd = 'python3 -m dynamo.vllm'
+      worker-initialized-regex = 'VllmWorker.*has.been.initialized'
+      extra-args = "--no-enable-expert-parallel"
+
+        [Tests.cmd_args.dynamo.decode_worker.args]
+        gpu-memory-utilization = 0.8
+        kv-transfer-config = '{"kv_connector":"LMCacheConnectorV1","kv_role":"kv_both"}'
+        tensor-parallel-size = 4
+        pipeline-parallel-size = 1
+        data-parallel-size = 1
+
+    [Tests.cmd_args.aiperf]
+      [Tests.cmd_args.aiperf.args]
+      concurrency = 2
+      extra-inputs = '{"min_tokens":10}'
+      output-tokens-mean = 500
+      request-count = 50
+      synthetic-input-tokens-mean = 300
+
+    [Tests.cmd_args.aiperf_accuracy]
+    entrypoint = "aiperf profile"
+    setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
+    cli = '''
+--model {model}
+--url {url}
+--endpoint-type chat
+--streaming
+--artifact-dir {artifact_dir}
+--no-server-metrics
+--accuracy-benchmark mmlu
+--accuracy-n-shots 5
+--accuracy-tasks abstract_algebra
+--concurrency 10
+--extra-inputs '{"temperature":0,"chat_template_kwargs":{"enable_thinking":false}}'
+--num-requests 100
+'''
+
+  [Tests.extra_env_vars]
+  UCX_LOG_LEVEL = "warn"
+  HF_HUB_OFFLINE = "0"
+  TRANSFORMERS_OFFLINE = "0"
+  HF_DATASETS_OFFLINE = "0"
+  DYNAMO_NODELIST = "$(scontrol show hostname $SLURM_JOB_NODELIST | tr -s '\\n' ',')"
+  UCX_TLS = "all"
@@ -206,6 +206,31 @@ action, typically seeded by ``random_seed``.
 
 Custom agents may extend the ``BaseAgentConfig`` and offer more parameters to configure.
 
+DSE parameter exclusions
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+CloudAI builds the DSE parameter space implicitly from list-valued fields under ``cmd_args``, list-valued
+``extra_env_vars``, and list-valued ``num_nodes``. If a list-valued ``cmd_args`` field is configuration data rather than
+a sweep dimension, exclude it with ``dse_excluded_args`` in the test or scenario definition.
+
+Entries in ``dse_excluded_args`` must be dot-separated paths that start with ``cmd_args.``. Each entry excludes that
+field and any nested fields below it from DSE parameter discovery:
+
+.. code-block:: toml
+
+   [[Tests]]
+   id = "Tests.1"
+   test_name = "my_test"
+   dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
+
+     [Tests.cmd_args.lmcache]
+     chunk_size = [256, 512]
+     lmcache_worker_ports = [8788, 8789, 8790, 8791]
+
+In this example, ``cmd_args.lmcache.chunk_size`` is still swept, while
+``cmd_args.lmcache.lmcache_worker_ports`` is treated as a single configuration value. The exclusion mechanism currently
+applies only to ``cmd_args`` paths; it does not exclude ``extra_env_vars`` or ``num_nodes`` from DSE.
+
 Metric errors and report strategies
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 

@@ -110,6 +110,77 @@ To use genai-perf, set:
      output-tokens-mean = 500
      request-count = 50
 
+Propagating LMCache Configuration
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+AIDynamo can pass an LMCache YAML config to the worker processes by setting ``LMCACHE_CONFIG_FILE`` inside the
+container. This only propagates the LMCache configuration; the vLLM/SGLang runtime still needs to be launched with the
+appropriate LMCache or KV-transfer connector for that image/version.
+
+The preferred form is structured TOML under ``[cmd_args.lmcache]``. CloudAI converts that object to YAML in the
+run output directory, mounts that directory as ``/cloudai_run_results``, and exports the generated file path as
+``LMCACHE_CONFIG_FILE``:
+
+.. code-block:: toml
+
+   [cmd_args]
+     [cmd_args.lmcache]
+     chunk_size = 256
+     local_cpu = true
+     controller_pull_url = "{frontend_node}:8300"
+     controller_reply_url = "{frontend_node}:8400"
+     lmcache_worker_ports = [8788, 8789, 8790, 8791]
+     max_local_cpu_size = 6.0
+     nixl_buffer_size = 2079377920
+     nixl_buffer_device = "cpu"
+
+       [cmd_args.lmcache.extra_config]
+       enable_nixl_storage = false
+       nixl_backend = "POSIX"
+       nixl_path = "{storage_cache_dir}"
+       nixl_pool_size = 2048
+
+For an example that uses test-in-scenario mode, see
+``conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml``. Because the test is fully defined inside the scenario,
+``--tests-dir`` is not required when running that example:
+
+.. code-block:: bash
+
+   uv run cloudai run --system-config <slurm system toml> \
+      --test-scenario conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
+
+The example sets ``dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]`` because
+``lmcache_worker_ports`` is a list-valued LMCache setting, not a DSE sweep dimension. Other list-valued LMCache fields
+can still be swept unless their ``cmd_args.`` path is also excluded.
+
+Alternatively, mount your own LMCache YAML file with ``extra_container_mounts`` and set ``LMCACHE_CONFIG_FILE`` through
+``extra_env_vars``:
+
+.. code-block:: toml
+
+   extra_container_mounts = ["/host/lmcache:/lmcache"]
+   extra_env_vars = { LMCACHE_CONFIG_FILE = "/lmcache/config.yaml" }
+
+For multi-node LMCache storage tests, any path referenced by the LMCache YAML, such as ``nixl_path`` for POSIX-backed
+storage, must be visible and writable from every node that is expected to share cached data. A node-local path such as
+``/tmp`` is suitable only for single-node smoke tests or configuration propagation checks.
+
+LMCache YAML values can use runtime placeholders. CloudAI renders them inside the Slurm job before launching workers:
+``{frontend_node}``, ``{frontend_ip}``, ``{results_dir}``, and ``{storage_cache_dir}``. Unknown placeholders fail the
+run before worker processes start.
+
+If the selected LMCache mode needs a controller, CloudAI can start one on the frontend node:
+
+.. code-block:: toml
+
+   [cmd_args.lmcache_controller]
+   cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-ports {\"pull\":8300,\"reply\":8400}"
+
+This only launches the process. For disaggregated or multi-node runs, the LMCache YAML still needs controller addresses
+that resolve to the frontend node from every worker. With the default controller monitor ports, use
+``controller_pull_url = "{frontend_node}:8300"`` and ``controller_reply_url = "{frontend_node}:8400"``. The
+``lmcache_worker_ports`` list must match the number of worker ranks.
+
 Semantic Degradation With AIPerf Accuracy
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
@@ -215,6 +286,7 @@ Supported Backends
 The following backends are available via the ``conf/experimental/ai_dynamo/test/`` directory:
 
 - **vLLM** (``vllm.toml``) — use with ``test_scenario/vllm_slurm.toml``
+- **vLLM with LMCache config propagation** — use self-contained scenario ``test_scenario/vllm_lmcache.toml``
 - **sglang** (``sglang.toml``) — use with ``test_scenario/sglang_slurm.toml``
 
 Both backends use ``aiperf`` as the default benchmark tool and support disaggregated prefill/decode.

@@ -147,7 +147,11 @@ def param_space(self) -> dict[str, Any]:
         extra_env_vars_dict = self.test.extra_env_vars
 
         action_space: dict[str, Any] = {
-            **{key: value for key, value in cmd_args_dict.items() if isinstance(value, list)},
+            **{
+                key: value
+                for key, value in cmd_args_dict.items()
+                if isinstance(value, list) and not self.test.is_dse_excluded_arg(key)
+            },
             **{f"extra_env_vars.{key}": value for key, value in extra_env_vars_dict.items() if isinstance(value, list)},
         }
         if isinstance(self.num_nodes, list):
@@ -183,8 +187,11 @@ def apply_params_set(self, action: dict[str, Any]) -> "TestRun":
                 attrs = key.split(".")
                 obj = tdef.cmd_args
                 for attr in attrs[:-1]:
-                    obj = getattr(obj, attr)
-                setattr(obj, attrs[-1], value)
+                    obj = obj[attr] if isinstance(obj, dict) else getattr(obj, attr)
+                if isinstance(obj, dict):
+                    obj[attrs[-1]] = value
+                else:
+                    setattr(obj, attrs[-1], value)
 
         type(tdef)(**tdef.model_dump())  # trigger validation
 

@@ -93,6 +93,7 @@ class TestRunModel(BaseModel):
     description: Optional[str] = None
     test_template_name: Optional[str] = None
     cmd_args: Optional[CmdArgs] = None
+    dse_excluded_args: Optional[list[str]] = None
     extra_env_vars: dict[str, str | list[str]] | None = None
     extra_container_mounts: Optional[list[str]] = None
     git_repos: Optional[list[GitRepo]] = None
@@ -114,6 +115,7 @@ def tdef_model_dump(self, by_alias: bool) -> dict:
             "agent_metrics": self.agent_metrics if "agent_metrics" in self.model_fields_set else None,
             "agent_reward_function": self.agent_reward_function,
             "agent_config": self.agent_config,
+            "dse_excluded_args": self.dse_excluded_args,
             "extra_container_mounts": self.extra_container_mounts,
             "extra_env_vars": self.extra_env_vars if self.extra_env_vars else None,
             "cmd_args": self.cmd_args.model_dump(by_alias=by_alias) if self.cmd_args else None,