Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/benchmark/sglang_benchmark_models.json
Original file line number Diff line number Diff line change
Expand Up @@ -298,5 +298,29 @@
"runner": "atom-mi355-8gpu-aac-runner",
"nightly_group": "B",
"env_vars": "SGLANG_DEFAULT_SERVER_ARGS=\nATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=0"
},
{
"display": "Kimi-K2-Thinking-MXFP4 TP8",
"dashboard_model": "Kimi-K2-Thinking-MXFP4-tp8",
"source_path": "amd/Kimi-K2-Thinking-MXFP4",
"path": "amd/Kimi-K2-Thinking-MXFP4",
"prefix": "kimi-k2-thinking-mxfp4-tp8",
"extra_args": "--trust-remote-code --tensor-parallel-size 8",
"bench_args": "",
"runner": "atom-mi355-8gpu-aac-runner",
"nightly_group": "B",
"env_vars": "SGLANG_USE_AITER=1"
},
{
"display": "Kimi-K2.5-MXFP4 TP8",
"dashboard_model": "Kimi-K2.5-MXFP4-tp8",
"source_path": "amd/Kimi-K2.5-MXFP4",
"path": "amd/Kimi-K2.5-MXFP4",
"prefix": "kimi-k25-mxfp4-tp8",
"extra_args": "--trust-remote-code --tensor-parallel-size 8",
"bench_args": "",
"runner": "atom-mi355-8gpu-aac-runner",
"nightly_group": "B",
"env_vars": "SGLANG_USE_AITER=1"
}
]
19 changes: 19 additions & 0 deletions .github/runner-config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -21,3 +21,22 @@ runners:
linux-atom-mi355-8:
gpu_arch: MI355
gpu_count: 8

atom-mi355-8gpu-oot-benchmark:
gpu_arch: MI355
gpu_count: 8

# mi35x pool schedules onto either MI350 or MI355 hardware.
# Reported as MI355; confirm with the devops-dashboard owner whether an
# MI35X heterogeneous-pool value is supported before relabeling.
linux-atom-mi35x-1:
gpu_arch: MI355
gpu_count: 1

linux-atom-mi35x-4:
gpu_arch: MI355
gpu_count: 4

linux-atom-mi35x-8:
gpu_arch: MI355
gpu_count: 8
5 changes: 5 additions & 0 deletions .github/workflows/atom-sglang-benchmark.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,11 @@ on:
- "deepseek-r1-fp4-tp8-mtp1 (1024x1024/8192x1024: [4,8,16,32,64])"
- "qwen3-5-397b-a17b-fp8-tp4 (1024x1024/8192x1024: [4,8,16,32,64])"
- "qwen3-5-397b-a17b-fp8-tp8 (1024x1024/8192x1024: [4,8,16,32,64])"
- "kimi-k2-thinking-mxfp4-tp8 (1024x1024/8192x1024: [4,8,16,32,64])"
- "kimi-k25-mxfp4-tp8 (1024x1024/8192x1024: [4,8,16,32,64])"
- "all-deepseek (8 DeepSeek configs x 10 default params)"
- "all-qwen (2 Qwen configs x 10 default params)"
- "all-kimi (2 Kimi configs x 10 default params)"
- "all-oob (all SGLang-OOB configs x 10 default params)"
default: "none (do not run SGLang-OOB models)"
mesh_config_preset:
Expand Down Expand Up @@ -409,6 +412,8 @@ jobs:
return prefix.startswith("deepseek-")
if preset == "all-qwen":
return prefix.startswith("qwen")
if preset == "all-kimi":
return prefix.startswith("kimi-")
return prefix == preset

if event == "schedule":
Expand Down
18 changes: 18 additions & 0 deletions .github/workflows/atom-sglang-test.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -229,6 +229,24 @@ jobs:
ATOM_ENABLE_QK_NORM_ROPE_CACHE_QUANT_FUSION=0
accuracy_test_threshold: 0.76
runner: linux-atom-mi35x-4
- model_name: "Kimi-K2-Thinking-MXFP4 TP8"
model_path: "amd/Kimi-K2-Thinking-MXFP4"
extra_args: "--tensor-parallel-size 8 --trust-remote-code"
env_vars: |
SGLANG_USE_AITER=1
# NOTE: initial threshold mirrors vLLM-OOT Kimi-K2-Thinking baseline (0.90);
# recalibrate from the first green SGLang run.
accuracy_test_threshold: 0.90
runner: linux-atom-mi35x-8
- model_name: "Kimi-K2.5-MXFP4 TP8"
model_path: "amd/Kimi-K2.5-MXFP4"
extra_args: "--tensor-parallel-size 8 --trust-remote-code"
env_vars: |
SGLANG_USE_AITER=1
# NOTE: initial threshold mirrors vLLM Kimi-K2.5 baseline (0.93 -> 0.92 margin);
# recalibrate from the first green SGLang run.
accuracy_test_threshold: 0.92
runner: linux-atom-mi35x-8
runs-on: ${{ matrix.runner }}
timeout-minutes: 180
env:
Expand Down
Loading