Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 23 additions & 0 deletions benchmarks/hardware/hygon/scripts/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
# 海光平台通信与带宽测试脚本

本目录包含基于 HYQual 和 rocm-bandwidth-test 的自动测试脚本,用于在海光平台评估:

1. CPU 与 AI 芯片间通信带宽(H2D 和 D2H)
2. AI 芯片间通信带宽
3. CUDA 代码兼容性测试
4. 内存带宽测试

## 目录结构

- `run_all_tests.sh`:一键执行所有测试
- `run_hyqual_test.expect`:自动执行 HYQual PCIe 带宽测试(菜单项 4)
- `run_hyqual_mem_test.expect`:自动执行 HYQual 内存带宽测试(菜单项 6)

## 使用说明

1. 拷贝 HYQual 工具包(https://download.sourcefind.cn:65024/directlink/5/%E5%9F%BA%E7%A1%80%E5%8E%8B%E5%8A%9B%E5%B7%A5%E5%85%B7/hyqual_v3.0.3.tar.gz)至某路径,如 `/home/xxx/hyqual_v3.0.3`
2. 修改 `run_all_tests.sh` 中对应路径变量
3. 运行测试脚本:
```bash
sudo bash run_all_tests.sh

65 changes: 65 additions & 0 deletions benchmarks/hardware/hygon/scripts/run_all_tests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
#!/bin/bash

set -e
set -o pipefail

# 设置路径变量
WORKDIR="$(pwd)"
LOGDIR="${WORKDIR}/logs"
HYQUAL_DIR="${WORKDIR}/hyqual_v3.0.3"
DTK_BIN_DIR="/opt/dtk/bin"
CUDA_TEST_DIR="/home/qiyuan"
MEM_BANDWIDTH_DIR="/home/hyqual_v2.2.7"

# 创建日志目录
mkdir -p "${LOGDIR}"

##############################
# 1. CPU 与 AI 芯片通信带宽测试
##############################
echo "[1/4] 进行 CPU 与 AI 芯片通信带宽测试..."
cd "${HYQUAL_DIR}"
chmod +x run
expect "${WORKDIR}/run_hyqual_test.expect" "${HYQUAL_DIR}" | tee "${LOGDIR}/cpu_to_dcu_bandwidth.log"
echo "[INFO] 已记录至 ${LOGDIR}/cpu_to_dcu_bandwidth.log"

##############################
# 2. AI 芯片间通信带宽测试
##############################
echo "[2/4] 进行 AI 芯片间通信带宽测试..."

if docker ps -a | grep -q bandwidth_test; then
echo "[INFO] 使用 bandwidth_test 容器执行 rocm-bandwidth-test..."
docker exec bandwidth_test bash -c "cd ${DTK_BIN_DIR} && ./rocm-bandwidth-test" | tee "${LOGDIR}/dcu_to_dcu_bandwidth.log"
else
echo "[WARNING] bandwidth_test 容器未运行,请手动启动后再执行 rocm-bandwidth-test。"
echo "[INFO] 启动命令:docker start bandwidth_test"
fi

##############################
# 3. CUDA 代码兼容性测试
##############################
echo "[3/4] 进行 CUDA 代码兼容性测试..."

docker exec bandwidth_test bash -c "cd /opt/dtk && source env.sh && source cuda/env.sh && cd ${CUDA_TEST_DIR} && ./test1" | tee "${LOGDIR}/cuda_test1.log"
docker exec bandwidth_test bash -c "cd /opt/dtk && source env.sh && source cuda/env.sh && cd ${CUDA_TEST_DIR} && ./test2" | tee "${LOGDIR}/cuda_test2.log"

##############################
# 4. 内存带宽测试
##############################
echo "[4/4] 进行内存带宽测试..."

docker exec bandwidth_test bash -c "cd /opt/dtk && source env.sh && source cuda/env.sh && cd ${MEM_BANDWIDTH_DIR} && expect /tmp/run_hyqual_mem_test.expect ${MEM_BANDWIDTH_DIR}" | tee "${LOGDIR}/memory_bandwidth.log"

##############################
# 总结输出
##############################
echo -e "\n 所有测试完成。日志保存在:${LOGDIR}"

echo -e "\n [1] H2D/D2H 带宽日志:${LOGDIR}/cpu_to_dcu_bandwidth.log"
echo -e " [2] DCU 间带宽日志:${LOGDIR}/dcu_to_dcu_bandwidth.log"
echo -e " [3] CUDA 测试日志:${LOGDIR}/cuda_test1.log 和 cuda_test2.log"
echo -e " [4] 内存带宽日志:${LOGDIR}/memory_bandwidth.log"

cd "${WORKDIR}"

31 changes: 31 additions & 0 deletions benchmarks/hardware/hygon/scripts/run_hyqual_mem_test.expect
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
#!/usr/bin/expect -f

set timeout 600
set hyqual_path [lindex $argv 0]

spawn bash $hyqual_path/run
expect {
"Key-in selection followed by <enter>:" {
send "6\r"
}
timeout {
puts "内存测试菜单未加载成功"
exit 1
}
}

expect {
-re "Bandwidth.*" {
sleep 2
send "q\r"
}
"Key-in selection followed by <enter>:" {
send "q\r"
}
timeout {
puts "内存带宽测试超时,强制退出"
send "\003"
}
eof {}
}

35 changes: 35 additions & 0 deletions benchmarks/hardware/hygon/scripts/run_hyqual_test.expect
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
#!/usr/bin/expect -f

set timeout 600 ;# 最多等10分钟
set hyqual_path [lindex $argv 0]

spawn $hyqual_path/run
expect {
"Key-in selection followed by <enter>:" {
send "4\r"
}
timeout {
puts "启动失败,未出现菜单提示"
exit 1
}
}

# 等待测试完成后退出
expect {
-re "Bandwidth.*" {
# HYQual输出测试结果后,发送 q 或 Ctrl+C 退出
sleep 2
send "q\r"
}
"Key-in selection followed by <enter>:" {
send "q\r"
}
timeout {
puts "测试超时,请检查 hyqual 是否卡住"
send "\003" ;# Ctrl+C 强退
}
eof {
# 正常退出
}
}