Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 15 additions & 0 deletions conf/experimental/ai_dynamo/test/sglang.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ name = "sglang"
description = "sglang backend"
test_template_name = "AIDynamo"
extra_container_mounts = ["/run/udev:/run/udev"]
dse_excluded_args = ["cmd_args.aiperf_phases"]

[cmd_args]
docker_image_url = "nvcr.io/nvidia/ai-dynamo/sglang-runtime:1.1.1"
Expand Down Expand Up @@ -88,6 +89,20 @@ workloads = "aiperf.sh"
request-count = 50
synthetic-input-tokens-mean = 300

[[cmd_args.aiperf_phases]]
name = "round_1"

[cmd_args.aiperf_phases.args]
concurrency = 2
request-count = 50

[[cmd_args.aiperf_phases]]
name = "round_2"

[cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50

[cmd_args.aiperf_accuracy]
entrypoint = "aiperf profile"
setup-cmd = "python -m pip install --break-system-packages --ignore-installed blinker==1.9.0 && python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
Expand Down
17 changes: 17 additions & 0 deletions conf/experimental/ai_dynamo/test/vllm.toml
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ name = "vLLM"
description = "vLLM backend"
test_template_name = "AIDynamo"
extra_container_mounts = ["/run/udev:/run/udev"]
dse_excluded_args = ["cmd_args.aiperf_phases"]

[cmd_args]
docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
Expand All @@ -38,6 +39,7 @@ workloads = "aiperf.sh"
tensor-parallel-size = 8
pipeline-parallel-size = 1
data-parallel-size = 1
kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'

[cmd_args.dynamo.decode_worker]
num-nodes = 1
Expand All @@ -50,6 +52,7 @@ workloads = "aiperf.sh"
tensor-parallel-size = 8
pipeline-parallel-size = 1
data-parallel-size = 1
kv-transfer-config = '{"kv_connector":"NixlConnector","kv_role":"kv_both"}'

[cmd_args.lmcache_controller]
cmd = "lmcache_controller --host 0.0.0.0 --port 9000 --monitor-port 9001"
Expand Down Expand Up @@ -78,6 +81,20 @@ workloads = "aiperf.sh"
request-count = 50
synthetic-input-tokens-mean = 300

[[cmd_args.aiperf_phases]]
name = "round_1"

[cmd_args.aiperf_phases.args]
concurrency = 2
request-count = 50

[[cmd_args.aiperf_phases]]
name = "round_2"

[cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50

[cmd_args.aiperf_accuracy]
entrypoint = "aiperf profile"
setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
Expand Down
16 changes: 15 additions & 1 deletion conf/experimental/ai_dynamo/test_scenario/vllm_lmcache.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ description = "Self-contained AIDynamo scenario wiring vLLM disaggregated infere
test_template_name = "AIDynamo"
time_limit = "00:10:00"
extra_container_mounts = ["/run/udev:/run/udev"]
dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports", "cmd_args.aiperf_phases"]

[Tests.cmd_args]
docker_image_url = "nvcr.io/nvidia/ai-dynamo/vllm-runtime:1.1.1"
Expand Down Expand Up @@ -90,6 +90,20 @@ dse_excluded_args = ["cmd_args.lmcache.lmcache_worker_ports"]
request-count = 50
synthetic-input-tokens-mean = 300

[[Tests.cmd_args.aiperf_phases]]
name = "round_1"

[Tests.cmd_args.aiperf_phases.args]
concurrency = 2
request-count = 50

[[Tests.cmd_args.aiperf_phases]]
name = "round_2"

[Tests.cmd_args.aiperf_phases.args]
concurrency = 4
request-count = 50

[Tests.cmd_args.aiperf_accuracy]
entrypoint = "aiperf profile"
setup-cmd = "python -m pip install --break-system-packages --upgrade aiperf==0.8.0"
Expand Down
4 changes: 4 additions & 0 deletions src/cloudai/workloads/ai_dynamo/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,15 @@
# limitations under the License.

from .ai_dynamo import (
AIPERF_COMMANDS_FILE_NAME,
LMCACHE_CONFIG_BACKUP_FILE_NAME,
LMCACHE_CONFIG_FILE_NAME,
AIDynamoArgs,
AIDynamoCmdArgs,
AIDynamoTestDefinition,
AIPerf,
AIPerfAccuracy,
AIPerfPhase,
GenAIPerf,
LMCacheController,
WorkerBaseArgs,
Expand All @@ -32,6 +34,7 @@
from .slurm_command_gen_strategy import AIDynamoSlurmCommandGenStrategy

__all__ = [
"AIPERF_COMMANDS_FILE_NAME",
"LMCACHE_CONFIG_BACKUP_FILE_NAME",
"LMCACHE_CONFIG_FILE_NAME",
"AIDynamoArgs",
Expand All @@ -42,6 +45,7 @@
"AIDynamoTestDefinition",
"AIPerf",
"AIPerfAccuracy",
"AIPerfPhase",
"GenAIPerf",
"LMCacheController",
"WorkerBaseArgs",
Expand Down
28 changes: 27 additions & 1 deletion src/cloudai/workloads/ai_dynamo/ai_dynamo.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,6 +42,7 @@
from cloudai.systems.slurm import SlurmSystem

AIPERF_ARTIFACTS_DIR = "aiperf_artifacts"
AIPERF_COMMANDS_FILE_NAME = "aiperf_commands.json"
AIPERF_ACCURACY_ARTIFACTS_DIR = "aiperf_accuracy_artifacts"
AIPERF_ACCURACY_RESULTS_CSV = "accuracy_results.csv"
LMCACHE_CONFIG_FILE_NAME = "lmcache-config.yaml"
Expand Down Expand Up @@ -254,6 +255,7 @@ class AIPerf(Workload):
name: str = "aiperf"
cmd: str = "aiperf profile"
script: File = File(Path(__file__).parent.parent / "ai_dynamo/aiperf.sh")
runtime: File = Field(default=File(Path(__file__).parent.parent / "ai_dynamo/runtime/aiperf.py"), exclude=True)
setup_cmd: str | None = Field(
default=None,
serialization_alias="setup-cmd",
Expand All @@ -267,7 +269,13 @@ class AIPerf(Workload):

@property
def installables(self) -> list[Installable]:
return [self.script]
return [self.script, self.runtime]


class AIPerfPhase(AIPerf):
"""Named AIPerf phase that overrides the base AIPerf configuration."""

name: str = Field(min_length=1, pattern=r"^[A-Za-z0-9_.-]+$")


class AIPerfAccuracy(BaseModel):
Expand Down Expand Up @@ -324,6 +332,7 @@ class AIDynamoCmdArgs(CmdArgs):
lmcache_controller: LMCacheController | None = None
genai_perf: GenAIPerf = Field(default_factory=GenAIPerf)
aiperf: AIPerf = Field(default_factory=AIPerf)
aiperf_phases: list[AIPerfPhase] | None = None
aiperf_accuracy: AIPerfAccuracy | None = None
workloads: str = "genai_perf.sh"

Expand All @@ -341,6 +350,23 @@ def validate_workloads(cls, v: str) -> str:
def workloads_list(self) -> list[str]:
return [w.strip() for w in self.workloads.split(",")]

@model_validator(mode="after")
def validate_aiperf_phases(self) -> "AIDynamoCmdArgs":
"""Validate AIPerf phases."""
if not self.aiperf_phases:
return self

seen = set()
duplicates = set()
for phase in self.aiperf_phases:
if phase.name in seen:
duplicates.add(phase.name)
seen.add(phase.name)
if duplicates:
raise ValueError(f"AIPerf phase names must be unique. Duplicates: {sorted(duplicates)}")

return self

@property
def installables(self) -> list[Installable]:
return [
Expand Down
178 changes: 2 additions & 176 deletions src/cloudai/workloads/ai_dynamo/aiperf.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,182 +2,8 @@
# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
# aiperf.sh — aiperf profile wrapper for ai_dynamo workloads.
#
# Called from ai_dynamo.sh's launch_workload() with:
# bash aiperf.sh --result-dir <dir> --model <model> --url <url> --port <port>
# [--cmd <cmd>] [--report-name <name>] [--artifact-dir-name <name>] [--extra-args <args>]
# -- <aiperf-args>...
#
# Context flags (before --) that are recognised and used:
# --result-dir Directory where artifacts and the final report are written.
# --model HuggingFace model identifier (e.g. Qwen/Qwen3-0.6B).
# --url Base URL of the dynamo.frontend (e.g. http://node01).
# --port HTTP port the dynamo.frontend is listening on.
# --report-name Output CSV name (default: aiperf_report.csv).
# --artifact-dir-name Artifact directory name under --result-dir (default: aiperf_artifacts).
# --cmd Full launch command including subcommand (default: "aiperf profile").
# --setup-cmd Optional shell command run before launching aiperf.
# --extra-args Raw string appended verbatim after all other flags.
#
# All unrecognised flags (--install-dir, --gpus-per-node, etc.) are silently
# consumed so this script is forward-compatible with launch_workload additions.
#
# Everything after -- is passed directly to the aiperf profile invocation.

set -Eeuo pipefail

result_dir=""
model=""
url="http://localhost"
port=8000
report_name="aiperf_report.csv"
artifact_dir_name="aiperf_artifacts"
cmd="aiperf profile"
setup_cmd=""
declare -a extra_args=()
declare -a profile_args=()

log() {
echo "[$(date '+%F %T') $(hostname)]: $*"
}

_parse_aiperf_args() {
while [[ $# -ge 2 ]]; do
case "$1" in
--*) profile_args+=("$1" "$2"); shift 2 ;;
*) shift ;;
esac
done
# Capture a trailing lone boolean flag if present.
# Use if/fi — not [[ ]] && — so set -e does not trigger on a false condition.
if [[ $# -eq 1 && "$1" == --* ]]; then
profile_args+=("$1")
fi
}

process_args() {
while [[ $# -gt 0 ]]; do
case "$1" in
--result-dir) result_dir="$2"; shift 2 ;;
--model) model="$2"; shift 2 ;;
--url) url="$2"; shift 2 ;;
--port) port="$2"; shift 2 ;;
--report-name) report_name="$2"; shift 2 ;;
--artifact-dir-name) artifact_dir_name="$2"; shift 2 ;;
--cmd) cmd="$2"; shift 2 ;;
--setup-cmd) setup_cmd="$2"; shift 2 ;;
--extra-args) read -ra extra_args <<< "$2"; shift 2 ;;
--) shift; _parse_aiperf_args "$@"; break ;;
--*) if [[ -n "${2:-}" && "${2}" != -* ]]; then shift 2; else shift 1; fi ;; # consume unknown flag; shift 2 only if next arg is a value
*) shift ;;
esac
done

log "Parsed args:
result_dir: $result_dir
model: $model
url: $url
port: $port
report_name: $report_name
artifact_dir: $artifact_dir_name
cmd: $cmd
setup_cmd: ${setup_cmd:-}
extra_args: ${extra_args[*]:-}
profile_args: ${profile_args[*]:-}"
}

run_setup_cmd() {
if [[ -z "$setup_cmd" ]]; then
return
fi

log "Running AIPerf setup command: $setup_cmd"
bash -lc "$setup_cmd"
log "AIPerf setup command complete"
}

process_results() {
local artifact_dir="$result_dir/$artifact_dir_name"
local csv_path=""

if [[ -f "$artifact_dir/profile_export_aiperf.csv" ]]; then
csv_path="$artifact_dir/profile_export_aiperf.csv"
else
csv_path=$(find "$artifact_dir" -name "*aiperf*.csv" -print -quit 2>/dev/null || true)
fi

if [[ -n "$csv_path" ]]; then
cp "$csv_path" "$result_dir/$report_name"
log "aiperf report saved to $result_dir/$report_name"
else
log "ERROR: no CSV found in $artifact_dir — aiperf may not have completed"
exit 1
fi

}

run_aiperf() {
local full_url="$1"
local artifact_dir="$2"
local -a run_cmd=()
read -ra run_cmd <<< "$cmd"
local -a launch_cmd=(
"${run_cmd[@]}"
--model "$model"
--url "$full_url"
--endpoint-type chat
--streaming
--artifact-dir "$artifact_dir"
--no-server-metrics
)

log "Launching aiperf: ${run_cmd[*]} --model $model --url $full_url"

if [[ "${#profile_args[@]}" -gt 0 ]]; then
launch_cmd+=("${profile_args[@]}")
fi
if [[ "${#extra_args[@]}" -gt 0 ]]; then
launch_cmd+=("${extra_args[@]}")
fi

"${launch_cmd[@]}"

log "aiperf run complete"
}

main() {
process_args "$@"

if [[ -z "$result_dir" ]]; then
log "ERROR: --result-dir is required"; exit 1
fi
if [[ -z "$model" ]]; then
log "ERROR: --model is required"; exit 1
fi

run_setup_cmd

local full_url="${url}:${port}"
local artifact_dir="$result_dir/$artifact_dir_name"
rm -rf "$artifact_dir"

run_aiperf "$full_url" "$artifact_dir"
process_results
}

main "$@"
exit 0
SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
exec python3 "${SCRIPT_DIR}/aiperf.py" "$@"
Loading
Loading