NVIDIA · podkidyshev · May 29, 2026 · May 30, 2026
@@ -18,6 +18,7 @@ name = "sglang"
 description = "sglang backend"
 test_template_name = "AIDynamo"
 extra_container_mounts = ["/run/udev:/run/udev"]
+dse_excluded_args = ["cmd_args.aiperf_phases"]
 
 [cmd_args]
 docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1"
@@ -88,6 +89,20 @@ workloads = "aiperf.sh"
     request-count = 50
     synthetic-input-tokens-mean = 300
 
+  [[cmd_args.aiperf_phases]]
+  name = "round_1"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 2
+    request-count = 50
+
+  [[cmd_args.aiperf_phases]]
+  name = "round_2"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 4
+    request-count = 50
+
   [cmd_args.aiperf_accuracy]
   entrypoint = "aiperf profile"
   setup-cmd = "python -m pip install --break-system-packages --ignore-installed blinker==1.9.0 && python -m pip install --break-system-packages --upgrade aiperf==0.8.0"

@@ -18,6 +18,7 @@ name = "vLLM"
 description = "vLLM backend"
 test_template_name = "AIDynamo"
 extra_container_mounts = ["/run/udev:/run/udev"]
+dse_excluded_args = ["cmd_args.aiperf_phases"]
 
 [cmd_args]
 docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
@@ -38,6 +39,7 @@ workloads = "aiperf.sh"
       tensor-parallel-size = 8
       pipeline-parallel-size = 1
       data-parallel-size = 1
+      kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
 
     [cmd_args.dynamo.decode_worker]
     num-nodes = 1
@@ -50,6 +52,7 @@ workloads = "aiperf.sh"
       tensor-parallel-size = 8
       pipeline-parallel-size = 1
       data-parallel-size = 1
+      kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'
 
   [cmd_args.lmcache_controller]
   cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001"
@@ -78,6 +81,20 @@ workloads = "aiperf.sh"
     request-count = 50
     synthetic-input-tokens-mean = 300
 
+  [[cmd_args.aiperf_phases]]
+  name = "round_1"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 2
+    request-count = 50
+
+  [[cmd_args.aiperf_phases]]
+  name = "round_2"
+
+    [cmd_args.aiperf_phases.args]
+    concurrency = 4
+    request-count = 50
+
   [cmd_args.aiperf_accuracy]
   entrypoint = "aiperf profile"
   setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"

@@ -24,7 +24,7 @@ description = "Self-contained AIDynamo scenario wiring vLLM disaggregated infere
 test_template_name = "AIDynamo"
 time_limit = "00:10:00"
 extra_container_mounts = ["/run/udev:/run/udev"]
-dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
+dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports", "cmd_args.aiperf_phases"]
 
   [Tests.cmd_args]
   docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
@@ -90,6 +90,20 @@ dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
       request-count = 50
       synthetic-input-tokens-mean = 300
 
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_1"
+
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 2
+      request-count = 50
+
+    [[Tests.cmd_args.aiperf_phases]]
+    name = "round_2"
+
+      [Tests.cmd_args.aiperf_phases.args]
+      concurrency = 4
+      request-count = 50
+
     [Tests.cmd_args.aiperf_accuracy]
     entrypoint = "aiperf profile"
     setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"

@@ -15,13 +15,15 @@
 # limitations under the License.
 
 from .ai_dynamo import (
+    AIPERF_COMMANDS_FILE_NAME,
     LMCACHE_CONFIG_BACKUP_FILE_NAME,
     LMCACHE_CONFIG_FILE_NAME,
     AIDynamoArgs,
     AIDynamoCmdArgs,
     AIDynamoTestDefinition,
     AIPerf,
     AIPerfAccuracy,
+    AIPerfPhase,
     GenAIPerf,
     LMCacheController,
     WorkerBaseArgs,
@@ -32,6 +34,7 @@
 from .slurm_command_gen_strategy import AIDynamoSlurmCommandGenStrategy
 
 __all__ = [
+    "AIPERF_COMMANDS_FILE_NAME",
     "LMCACHE_CONFIG_BACKUP_FILE_NAME",
     "LMCACHE_CONFIG_FILE_NAME",
     "AIDynamoArgs",
@@ -42,6 +45,7 @@
     "AIDynamoTestDefinition",
     "AIPerf",
     "AIPerfAccuracy",
+    "AIPerfPhase",
     "GenAIPerf",
     "LMCacheController",
     "WorkerBaseArgs",

@@ -42,6 +42,7 @@
 from cloudai.systems.slurm import SlurmSystem
 
 AIPERF_ARTIFACTS_DIR = "aiperf_artifacts"
+AIPERF_COMMANDS_FILE_NAME = "aiperf_commands.json"
 AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts"
 AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv"
 LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml"
@@ -254,6 +255,7 @@ class AIPerf(Workload):
     name: str = "aiperf"
     cmd: str = "aiperf profile"
     script: File = File(Path(__file__).parent.parent / "ai_dynamo/aiperf.sh")
+    runtime: File = Field(default=File(Path(__file__).parent.parent / "ai_dynamo/runtime/aiperf.py"), exclude=True)
     setup_cmd: str | None = Field(
         default=None,
         serialization_alias="setup-cmd",
@@ -267,7 +269,13 @@ class AIPerf(Workload):
 
     @property
     def installables(self) -> list[Installable]:
-        return [self.script]
+        return [self.script, self.runtime]
+
+
+class AIPerfPhase(AIPerf):
+    """Named AIPerf phase that overrides the base AIPerf configuration."""
+
+    name: str = Field(min_length=1, pattern=r"^[A-Za-z0-9_.-]+$")
 
 
 class AIPerfAccuracy(BaseModel):
@@ -324,6 +332,7 @@ class AIDynamoCmdArgs(CmdArgs):
     lmcache_controller: LMCacheController | None = None
     genai_perf: GenAIPerf = Field(default_factory=GenAIPerf)
     aiperf: AIPerf = Field(default_factory=AIPerf)
+    aiperf_phases: list[AIPerfPhase] | None = None
     aiperf_accuracy: AIPerfAccuracy | None = None
     workloads: str = "genai_perf.sh"
 
@@ -341,6 +350,23 @@ def validate_workloads(cls, v: str) -> str:
     def workloads_list(self) -> list[str]:
         return [w.strip() for w in self.workloads.split(",")]
 
+    @model_validator(mode="after")
+    def validate_aiperf_phases(self) -> "AIDynamoCmdArgs":
+        """Validate AIPerf phases."""
+        if not self.aiperf_phases:
+            return self
+
+        seen = set()
+        duplicates = set()
+        for phase in self.aiperf_phases:
+            if phase.name in seen:
+                duplicates.add(phase.name)
+            seen.add(phase.name)
+        if duplicates:
+            raise ValueError(f"AIPerf phase names must be unique. Duplicates: {sorted(duplicates)}")
+
+        return self
+
     @property
     def installables(self) -> list[Installable]:
         return [

@@ -2,182 +2,8 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
 # Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-# aiperf.sh — aiperf profile wrapper for ai_dynamo workloads.
-#
-# Called from ai_dynamo.sh's launch_workload() with:
-#   bash aiperf.sh --result-dir <dir> --model <model> --url <url> --port <port>
-#                  [--cmd <cmd>] [--report-name <name>] [--artifact-dir-name <name>] [--extra-args <args>]
-#                  -- <aiperf-args>...
-#
-# Context flags (before --) that are recognised and used:
-#   --result-dir    Directory where artifacts and the final report are written.
-#   --model         HuggingFace model identifier (e.g. Qwen/Qwen3-0.6B).
-#   --url           Base URL of the dynamo.frontend (e.g. http://node01).
-#   --port          HTTP port the dynamo.frontend is listening on.
-#   --report-name   Output CSV name (default: aiperf_report.csv).
-#   --artifact-dir-name  Artifact directory name under --result-dir (default: aiperf_artifacts).
-#   --cmd           Full launch command including subcommand (default: "aiperf profile").
-#   --setup-cmd     Optional shell command run before launching aiperf.
-#   --extra-args    Raw string appended verbatim after all other flags.
-#
-# All unrecognised flags (--install-dir, --gpus-per-node, etc.) are silently
-# consumed so this script is forward-compatible with launch_workload additions.
-#
-# Everything after -- is passed directly to the aiperf profile invocation.
 
 set -Eeuo pipefail
 
-result_dir=""
-model=""
-url="http://localhost"
-port=8000
-report_name="aiperf_report.csv"
-artifact_dir_name="aiperf_artifacts"
-cmd="aiperf profile"
-setup_cmd=""
-declare -a extra_args=()
-declare -a profile_args=()
-
-log() {
-  echo "[$(date '+%F %T') $(hostname)]: $*"
-}
-
-_parse_aiperf_args() {
-  while [[ $# -ge 2 ]]; do
-    case "$1" in
-      --*) profile_args+=("$1" "$2"); shift 2 ;;
-      *)   shift ;;
-    esac
-  done
-  # Capture a trailing lone boolean flag if present.
-  # Use if/fi — not [[ ]] && — so set -e does not trigger on a false condition.
-  if [[ $# -eq 1 && "$1" == --* ]]; then
-    profile_args+=("$1")
-  fi
-}
-
-process_args() {
-  while [[ $# -gt 0 ]]; do
-    case "$1" in
-      --result-dir)   result_dir="$2";  shift 2 ;;
-      --model)        model="$2";       shift 2 ;;
-      --url)          url="$2";         shift 2 ;;
-      --port)         port="$2";        shift 2 ;;
-      --report-name)  report_name="$2"; shift 2 ;;
-      --artifact-dir-name) artifact_dir_name="$2"; shift 2 ;;
-      --cmd)               cmd="$2";               shift 2 ;;
-      --setup-cmd)         setup_cmd="$2";         shift 2 ;;
-      --extra-args)        read -ra extra_args <<< "$2"; shift 2 ;;
-      --)                  shift; _parse_aiperf_args "$@"; break ;;
-      --*)            if [[ -n "${2:-}" && "${2}" != -* ]]; then shift 2; else shift 1; fi ;;  # consume unknown flag; shift 2 only if next arg is a value
-      *)              shift ;;
-    esac
-  done
-
-  log "Parsed args:
-    result_dir:   $result_dir
-    model:        $model
-    url:          $url
-    port:         $port
-    report_name:  $report_name
-    artifact_dir: $artifact_dir_name
-    cmd:          $cmd
-    setup_cmd:    ${setup_cmd:-}
-    extra_args:   ${extra_args[*]:-}
-    profile_args: ${profile_args[*]:-}"
-}
-
-run_setup_cmd() {
-  if [[ -z "$setup_cmd" ]]; then
-    return
-  fi
-
-  log "Running AIPerf setup command: $setup_cmd"
-  bash -lc "$setup_cmd"
-  log "AIPerf setup command complete"
-}
-
-process_results() {
-  local artifact_dir="$result_dir/$artifact_dir_name"
-  local csv_path=""
-
-  if [[ -f "$artifact_dir/profile_export_aiperf.csv" ]]; then
-    csv_path="$artifact_dir/profile_export_aiperf.csv"
-  else
-    csv_path=$(find "$artifact_dir" -name "*aiperf*.csv" -print -quit 2>/dev/null || true)
-  fi
-
-  if [[ -n "$csv_path" ]]; then
-    cp "$csv_path" "$result_dir/$report_name"
-    log "aiperf report saved to $result_dir/$report_name"
-  else
-    log "ERROR: no CSV found in $artifact_dir — aiperf may not have completed"
-    exit 1
-  fi
-
-}
-
-run_aiperf() {
-  local full_url="$1"
-  local artifact_dir="$2"
-  local -a run_cmd=()
-  read -ra run_cmd <<< "$cmd"
-  local -a launch_cmd=(
-    "${run_cmd[@]}"
-    --model "$model"
-    --url "$full_url"
-    --endpoint-type chat
-    --streaming
-    --artifact-dir "$artifact_dir"
-    --no-server-metrics
-  )
-
-  log "Launching aiperf: ${run_cmd[*]} --model $model --url $full_url"
-
-  if [[ "${#profile_args[@]}" -gt 0 ]]; then
-    launch_cmd+=("${profile_args[@]}")
-  fi
-  if [[ "${#extra_args[@]}" -gt 0 ]]; then
-    launch_cmd+=("${extra_args[@]}")
-  fi
-
-  "${launch_cmd[@]}"
-
-  log "aiperf run complete"
-}
-
-main() {
-  process_args "$@"
-
-  if [[ -z "$result_dir" ]]; then
-    log "ERROR: --result-dir is required"; exit 1
-  fi
-  if [[ -z "$model" ]]; then
-    log "ERROR: --model is required"; exit 1
-  fi
-
-  run_setup_cmd
-
-  local full_url="${url}:${port}"
-  local artifact_dir="$result_dir/$artifact_dir_name"
-  rm -rf "$artifact_dir"
-
-  run_aiperf "$full_url" "$artifact_dir"
-  process_results
-}
-
-main "$@"
-exit 0
+SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
+exec python3 "${SCRIPT_DIR}/aiperf.py" "$@"