Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions tests/integration/defs/perf/test_perf_sanity.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@
"llama_v3.3_70b_instruct_fp4": "llama-3.3-models/Llama-3.3-70B-Instruct-FP4",
"deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8",
"llama_v3.1_8b_instruct": "llama-3.1-model/Llama-3.1-8B-Instruct",
"llama_v3.1_8b_instruct_fp8": "llama-3.1-model/Llama-3.1-8B-Instruct-FP8",
"glm_5_nvfp4": "GLM-5-NVFP4",
}

Expand Down
2 changes: 2 additions & 0 deletions tests/integration/test_lists/test-db/l0_h100.yml
Original file line number Diff line number Diff line change
Expand Up @@ -523,3 +523,5 @@ l0_h100:
- examples/test_ad_speculative_decoding.py::test_eagle_wrapper_forward[2]
- examples/test_ad_speculative_decoding.py::test_nemotron_mtp_model_with_weights
- examples/test_ad_guided_decoding.py::test_autodeploy_guided_decoding_main_json
# ------------- AutoDeploy Perf Sanity ---------------
- perf/test_perf_sanity.py::test_e2e[aggr_upload-llama3_1_8b_fp8_ad_hopper-llama3_1_8b_ad_ws1_1k1k] TIMEOUT (120)
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
metadata:
model_name: llama_v3.1_8b_instruct_fp8
supported_gpus:
- H100
- H200
hardware:
gpus_per_node: 1
server_configs:
# 1k1k config - AutoDeploy backend, 1 GPU (Llama-3.1-8B FP8 on Hopper)
- name: "llama3_1_8b_ad_ws1_1k1k"
model_name: "llama_v3.1_8b_instruct_fp8"
backend: "_autodeploy"
extra_llm_api_config_path: "examples/auto_deploy/model_registry/configs/llama3_1_8b.yaml"
world_size: 1
client_configs:
- name: "con64_iter10_1k1k"
concurrency: 64
iterations: 10
isl: 1024
osl: 1024
backend: "openai"
Loading