NVIDIA · MrGeva · May 12, 2026
@@ -56,6 +56,7 @@
     "llama_v3.3_70b_instruct_fp4": "llama-3.3-models/Llama-3.3-70B-Instruct-FP4",
     "deepseek_v3_lite_fp8": "DeepSeek-V3-Lite/fp8",
     "llama_v3.1_8b_instruct": "llama-3.1-model/Llama-3.1-8B-Instruct",
+    "llama_v3.1_8b_instruct_fp8": "llama-3.1-model/Llama-3.1-8B-Instruct-FP8",
     "glm_5_nvfp4": "GLM-5-NVFP4",
 }
 

diff --git a/tests/integration/test_lists/test-db/l0_h100.yml b/tests/integration/test_lists/test-db/l0_h100.yml
@@ -523,3 +523,5 @@ l0_h100:
   - examples/test_ad_speculative_decoding.py::test_eagle_wrapper_forward[2]
   - examples/test_ad_speculative_decoding.py::test_nemotron_mtp_model_with_weights
   - examples/test_ad_guided_decoding.py::test_autodeploy_guided_decoding_main_json
+  # ------------- AutoDeploy Perf Sanity ---------------
+  - perf/test_perf_sanity.py::test_e2e[aggr_upload-llama3_1_8b_fp8_ad_hopper-llama3_1_8b_ad_ws1_1k1k] TIMEOUT (120)
diff --git a/tests/scripts/perf-sanity/aggregated/llama3_1_8b_fp8_ad_hopper.yaml b/tests/scripts/perf-sanity/aggregated/llama3_1_8b_fp8_ad_hopper.yaml
@@ -0,0 +1,21 @@
+metadata:
+  model_name: llama_v3.1_8b_instruct_fp8
+  supported_gpus:
+  - H100
+  - H200
+hardware:
+  gpus_per_node: 1
+server_configs:
+  # 1k1k config - AutoDeploy backend, 1 GPU (Llama-3.1-8B FP8 on Hopper)
+  - name: "llama3_1_8b_ad_ws1_1k1k"
+    model_name: "llama_v3.1_8b_instruct_fp8"
+    backend: "_autodeploy"
+    extra_llm_api_config_path: "examples/auto_deploy/model_registry/configs/llama3_1_8b.yaml"
+    world_size: 1
+    client_configs:
+      - name: "con64_iter10_1k1k"
+        concurrency: 64
+        iterations: 10
+        isl: 1024
+        osl: 1024
+        backend: "openai"