NVIDIA · ybenvidia · Mar 5, 2026 · Apr 15, 2026 · Apr 26, 2026 · May 13, 2026
@@ -87,6 +87,7 @@ ehthumbs.db
 Thumbs.db
 
 *.log
+slurm-*
 install/
 results/
 .*

@@ -0,0 +1,27 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "deepep_test_ep_v2"
+description = "Official DeepEP V2 elastic test_ep"
+test_template_name = "DeepEP"
+
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+subtest_name = "test_ep"
+elastic_tests_root = "/path/in/the/container/to/the/tests/folder"
+num_processes = 8
+num_sms = 0
+num_qps = 0
+num_allocated_qps = 0
+num_tokens = 4096
+hidden = 7168
+num_topk = 8
+num_experts = 256
+do_cpu_sync = 1
+allow_hybrid_mode = 1
+allow_multiple_reduction = 1
+prefer_overlap_with_compute = 0
+seed = 0
+skip_check = false
+skip_perf_test = false
@@ -0,0 +1,19 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "deepep_test_internode"
+description = "Official DeepEP V1 legacy test_internode"
+test_template_name = "DeepEP"
+
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+subtest_name = "test_internode"
+legacy_tests_root = "/path/in/the/container/to/the/tests/folder"
+num_processes = 8
+num_tokens = 4096
+hidden = 7168
+num_topk = 8
+num_experts = 256
+pressure_test_mode = 0
+test_ll_compatibility = false
@@ -0,0 +1,18 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "deepep_test_intranode"
+description = "Official DeepEP V1 legacy test_intranode"
+test_template_name = "DeepEP"
+
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+subtest_name = "test_intranode"
+legacy_tests_root = "/path/in/the/container/to/the/tests/folder"
+num_processes = 8
+num_tokens = 4096
+hidden = 7168
+num_topk = 8
+num_experts = 256
+allow_mnnvl = false
@@ -0,0 +1,22 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "deepep_test_low_latency"
+description = "Official DeepEP V1 legacy test_low_latency"
+test_template_name = "DeepEP"
+
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+subtest_name = "test_low_latency"
+legacy_tests_root = "/path/in/the/container/to/the/tests/folder"
+num_processes = 8
+num_tokens = 128
+hidden = 7168
+num_topk = 8
+num_experts = 288
+allow_mnnvl = false
+disable_nvlink = false
+use_logfmt = false
+pressure_test = false
+shrink_test = false
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "moe_benchmark_low_latency"
+description = "MoE Benchmark - DeepEP low-latency mode plus matrix export"
+test_template_name = "MoEBenchmark"
+
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+benchmark_root = "/path/in/the/container/to/the/tests/folder"
+mode = "low_latency"
+tokens = 128
+num_experts = 288
+num_topk = 8
+hidden_size = 7168
+data_type = "bfloat16"
+allow_nvlink_for_low_latency = false
+allow_mnnvl = false
+round_scale = false
+use_ue8m0 = false
+num_warmups = 20
+num_iterations = 50
+shuffle_columns = false
+use_kineto_profiler = false
+enable_tuning = false
+config_file_path = "/tmp/config.yaml"
+results_dir = "/workspace/dp-benchmark/results"
+
+[extra_env_vars]
+NUM_QPS_PER_RANK = "12"
+NUM_SMS = "24"
@@ -0,0 +1,32 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "moe_benchmark_standard"
+description = "MoE Benchmark - DeepEP standard mode plus matrix export"
+test_template_name = "MoEBenchmark"
+
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+benchmark_root = "/workspace/dp-benchmark/benchmark"
+mode = "standard"
+tokens = 4096
+num_experts = 256
+num_topk = 8
+hidden_size = 7168
+data_type = "bfloat16"
+allow_nvlink_for_low_latency = false
+allow_mnnvl = false
+round_scale = false
+use_ue8m0 = false
+num_warmups = 20
+num_iterations = 50
+shuffle_columns = false
+use_kineto_profiler = false
+enable_tuning = false
+config_file_path = "/tmp/config.yaml"
+results_dir = "/workspace/dp-benchmark/results"
+
+[extra_env_vars]
+NUM_QPS_PER_RANK = "12"
+NUM_SMS = "24"
@@ -1,5 +1,5 @@
 # SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
-# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright (c) 2024-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -14,16 +14,24 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-name = "deepep-benchmark"
+name = "nccl_test_alltoallv"
+description = "NCCL AlltoAllv"
+test_template_name = "NcclTest"
 
-[[Tests]]
-id = "Tests.1"
-test_name = "deepep_standard"
-num_nodes = 2
-time_limit = "00:30:00"
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+subtest_name = "alltoallv_perf_mpi"
+nthreads = 1
+ngpus = 1
+minbytes = "512M"
+maxbytes = "512M"
+stepfactor = 2
+iters = 10
+warmup_iters = 1
+check = 1
+blocking = 0
+use_deepep_matrix = true
 
-[[Tests]]
-id = "Tests.2"
-test_name = "deepep_low_latency"
-num_nodes = 2
-time_limit = "00:30:00"
+[extra_env_vars]
+NCCL_P2P_DISABLE = "1"
+NCCL_SHM_DISABLE = "1"
@@ -0,0 +1,40 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2024-2025 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name = "ucc_alltoallv_deepep"
+description = "UCC AlltoAllv"
+test_template_name = "UCCTest"
+
+[cmd_args]
+docker_image_url = "/your/path/to/the/container"
+collective = "alltoallv"
+b = 1
+e = "8M"
+use_deepep_matrix = true
+
+[extra_env_vars]
+UCX_IB_GID_INDEX = "auto"
+UCX_TLS = "cuda_copy,rc"
+UCX_RNDV_THRESH = "0"
+UCX_RNDV_SCHEME = "get_zcopy"
+MELLANOX_VISIBLE_DEVICES = "0,3,4,5,6,9,10,11"
+CUDA_VISIBLE_DEVICES = "0,1,2,3,4,5,6,7"
+UCC_CL_HIER_FULL_SBGP_TLS = "ucp"
+UCC_CL_HIER_NODE_SBGP_TLS = "cuda"
+UCC_TLS = "ucp,cuda"
+UCC_CL_HIER_TUNE = "alltoallv:0-inf:@node_split"
+UCC_TL_UCP_ALLTOALLV_PAIRWISE_NUM_POSTS = "8"
+UCC_CLS = "basic,hier"
@@ -0,0 +1,11 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "deepep-official-tests"
+
+[[Tests]]
+id = "Tests.deepep_test_internode"
+test_name = "deepep_test_internode"
+num_nodes = 2
+time_limit = "00:30:00"
@@ -0,0 +1,29 @@
+# SPDX-FileCopyrightText: NVIDIA CORPORATION & AFFILIATES
+# Copyright (c) 2025-2026 NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# SPDX-License-Identifier: Apache-2.0
+
+name = "moe-benchmark"
+
+[[Tests]]
+id = "Tests.moe_benchmark"
+test_name = "moe_benchmark_standard"
+num_nodes = 2
+time_limit = "00:30:00"
+
+[[Tests]]
+id = "Tests.ucc_alltoallv"
+test_name = "ucc_alltoallv_deepep"
+num_nodes = 2
+time_limit = "00:30:00"
+  [[Tests.dependencies]]
+  type = "start_post_comp"
+  id = "Tests.moe_benchmark"
+
+[[Tests]]
+id = "Tests.nccl_alltoallv"
+test_name = "nccl_test_alltoallv"
+num_nodes = 2
+time_limit = "00:30:00"
+  [[Tests.dependencies]]
+  type = "start_post_comp"
+  id = "Tests.ucc_alltoallv"
-Original file line number
+Diff line change
@@ Expand Up / @@ -87,6 +87,7 @@ ehthumbs.db @@
     Thumbs.db
     *.log
+    slurm-*
     install/
     results/
     .*
@@ Expand Down @@