Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
15 changes: 12 additions & 3 deletions .github/workflows/gitlab-manual-ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -44,19 +44,27 @@ jobs:
name: Run GitLab CI manually
runs-on: ubuntu-latest
steps:
- name: Check out target ref
- name: Check out trusted workflow ref
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ inputs.target_ref }}
path: trusted

- name: Prepare GitLab repository settings
id: gitlab-repo
uses: ./.github/actions/prepare-gitlab-repo
uses: ./trusted/.github/actions/prepare-gitlab-repo
with:
gitlab-repo: ${{ secrets.GITLAB_REPO }}

- name: Check out target ref
uses: actions/checkout@v4
with:
fetch-depth: 0
ref: ${{ inputs.target_ref }}
path: target

- name: Push target ref to GitLab test branch
working-directory: target
env:
GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
GITLAB_REPO_HOST_PATH: ${{ steps.gitlab-repo.outputs.host-path }}
Expand Down Expand Up @@ -184,6 +192,7 @@ jobs:

- name: Delete GitLab test branch
if: always()
working-directory: trusted
env:
GITLAB_TOKEN: ${{ secrets.GITLAB_TOKEN }}
GITLAB_REPO_HOST_PATH: ${{ steps.gitlab-repo.outputs.host-path }}
Expand Down
3 changes: 1 addition & 2 deletions benchpark-bridge/scripts/result_converter.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@
import os
import sys
import glob
import yaml
from datetime import datetime
from pathlib import Path

Expand Down Expand Up @@ -535,4 +534,4 @@ def main():


if __name__ == "__main__":
main()
main()
9 changes: 9 additions & 0 deletions config/queue.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
queue,submit_cmd,template
SLURM_AI4SS,sbatch,"-p ${queue_group} -t ${elapse} -N ${nodes} --ntasks-per-node=${numproc_node} --cpus-per-task=${nthreads} --gpus-per-node=${numproc_node}"
FJ,pjsub,"-L rscunit=rscunit_ft01,rscgrp=${queue_group},elapse=${elapse},node=${nodes} --mpi max-proc-per-node=${numproc_node} -x PJM_LLIO_GFSCACHE=/vol0002:/vol0003:/vol0004:/vol0005"
PJM_GENKAI,pjsub,"-L rscgrp=${queue_group},elapse=${elapse},node=${nodes} --mpi proc=${proc}"
SLURM_RC,sbatch,"-p ${queue_group} -t ${elapse} -N ${nodes} --ntasks-per-node=${numproc_node} --cpus-per-task=${nthreads}"
Expand All @@ -7,4 +8,12 @@ PBS_Grand_C,qsub,"-q ${queue_group} -l select=${nodes}:nsockets=${cpu_per_node},
PBS_Grand_G,qsub,"-q ${queue_group} -l select=${nodes}:ngpus=1,walltime=${elapse} -W group_list=d30992"
NQSV_AOBA_VE,qsub,"-Z -v http_proxy,https_proxy,HTTP_PROXY,HTTPS_PROXY -q ${queue_group} -T necmpi --venode ${proc} -l elapstim_req=${elapse}"
NQSV_AOBA_B,qsub,"-Z -v http_proxy,https_proxy,HTTP_PROXY,HTTPS_PROXY -q ${queue_group} -T intmpi -b ${nodes} -l elapstim_req=${elapse}"
PJM_WISTERIA_O,pjsub,"-g jh260034o -L rscgrp=${queue_group},elapse=${elapse},node=${nodes} --mpi proc=${proc} --omp thread=${nthreads}"
PJM_WISTERIA_A,pjsub,"-g jh260034a -L rscgrp=${queue_group},elapse=${elapse},node=${nodes} --mpi proc=${proc} --omp thread=${nthreads}"
PBS_TSUKUBA,qsub,"-q ${queue_group} -l select=${nodes}:mpiprocs=${numproc_node}:ompthreads=${nthreads} -l walltime=${elapse}"
AGE_TSUBAME4,qsub,"-l ${queue_group}=${nodes} -l h_rt=${elapse}"
SLURM_CAMPHOR3,sbatch,"-p ${queue_group} -t ${elapse} --rsc p=${proc}:t=${nthreads}:c=${nthreads}:m=1G"
NQSV_OSAKA_CPU,qsub,"-q ${queue_group} -b ${nodes} -l elapstim_req=${elapse},cpunum_job=${nthreads}"
NQSV_OSAKA_GPU,qsub,"-q ${queue_group} -b ${nodes} -l elapstim_req=${elapse},cpunum_job=${nthreads},gpunum_job=${gpu_per_node}"
NQSV_OSAKA_VE,qsub,"-q ${queue_group} --venode ${proc} -l elapstim_req=${elapse}"
none,none,none
11 changes: 11 additions & 0 deletions config/system.csv
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
system,mode,tag_build,tag_run,queue,queue_group
AI4SS,cross,ai4ss_login,ai4ss_jacamar,SLURM_AI4SS,1n1gpu
Fugaku,cross,fugaku_login1,fugaku_jacamar,FJ,small
FugakuLN,native,,fugaku_login1,none,small
FugakuCN,native,,fugaku_jacamar,FJ,small
Expand All @@ -16,4 +17,14 @@ Grand_G,cross,grand_login,grand_jacamar,PBS_Grand_G,eg
AOBA_A,cross,aoba_ab_login,aoba_ab_jacamar,NQSV_AOBA_VE,sx
AOBA_B,cross,aoba_ab_login,aoba_ab_jacamar,NQSV_AOBA_B,lx
AOBA_S,cross,aoba_s_login,aoba_s_jacamar,NQSV_AOBA_VE,sxs
Odyssey,cross,wisteria_login,wisteria-o_jacamar,PJM_WISTERIA_O,short-o
Aquarius,cross,wisteria_login,wisteria-a_jacamar,PJM_WISTERIA_A,short-a
Pegasus,cross,pegasus_login,pegasus_jacamar,PBS_TSUKUBA,regular
Sirius,cross,sirius_login,sirius_jacamar,PBS_TSUKUBA,regular
TSUBAME4,cross,tsubame4_login,tsubame4_jacamar,AGE_TSUBAME4,node_f
Camphor3,cross,camphor3_login,camphor3_jacamar,SLURM_CAMPHOR3,jha
SQUID_CPU,cross,squid_login,squid_jacamar,NQSV_OSAKA_CPU,SQUID
SQUID_GPU,cross,squid_login,squid_jacamar,NQSV_OSAKA_GPU,SQUID
SQUID_VECTOR,cross,squid_login,squid_jacamar,NQSV_OSAKA_VE,SQUID
OCTOPUS,cross,octopus_login,octopus_jacamar,NQSV_OSAKA_CPU,OCT
FNCX,native,,fncx-curl-jq,none,small
45 changes: 28 additions & 17 deletions config/system_info.csv
Original file line number Diff line number Diff line change
@@ -1,18 +1,29 @@
system,name,cpu_name,cpu_per_node,cpu_cores,gpu_name,gpu_per_node,memory,display_order
Fugaku,Fugaku,A64FX,1,48,-,-,32GB,1
FugakuCN,FugakuCN,A64FX,1,48,-,-,32GB,2
FugakuLN,FugakuLN,Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz,2,16,-,-,96GB,3
MiyabiG,MiyabiG,NVIDIA Grace CPU,1,72,NVIDIA Hopper H100 GPU,1,120GB,4
MiyabiC,MiyabiC,Intel Xeon Max 9480,2,56,-,-,128GB,5
RC_GH200,RC_GH200,NVIDIA Grace CPU,1,72,NVIDIA Hopper H100 GPU,1,120GB,6
RC_DGXSP,RC_DGXSP,ARM Cortex-X925 / Cortex-A725,1,20,NVIDIA GB10,1,128GB,7
RC_GENOA,RC_GENOA,AMD EPYC 9684X,2,96,-,-,768GB,8
RC_FX700,RC_FX700,A64FX,1,48,-,-,32GB,9
GenkaiA,GenkaiA,Intel Xeon Platinum 8490H (Sapphire Rapids),2,60,-,-,512GiB,10
GenkaiB,GenkaiB,Intel Xeon Platinum 8490H (Sapphire Rapids),2,60,NVIDIA H100 (Hopper),4,1024GiB,11
GenkaiC,GenkaiC,Intel Xeon Platinum 8480+ (Sapphire Rapids),2,56,NVIDIA H100 (Hopper),8,8TiB,12
Grand_C,Grand_C,Intel Xeon Gold 6548Y+ (Emerald Rapids),2,32,-,-,512GiB,13
Grand_G,Grand_G,Intel Xeon Gold 6548Y+ (Emerald Rapids),2,32,NVIDIA H100 (Hopper),4,512GiB,14
AOBA_A,AOBA_A,SX-Aurora TSUBASA VH,1,24,NEC SX-Aurora TSUBASA Type 20B VE,8,640GB,15
AOBA_B,AOBA_B,AMD EPYC 7702,2,64,-,-,256GB,16
AOBA_S,AOBA_S,SX-Aurora TSUBASA VH,1,64,NEC SX-Aurora TSUBASA Type 30A VE,8,256GB + 768GB,17
AI4SS,RIKEN AI4S Supercomputer,NVIDIA Grace CPU,2,72,NVIDIA B200,4,960GiB + 692.8GiB,1
Fugaku,Fugaku,A64FX,1,48,-,-,32GB,2
FugakuCN,FugakuCN,A64FX,1,48,-,-,32GB,3
FugakuLN,FugakuLN,Intel(R) Xeon(R) Gold 6242 CPU @ 2.80GHz,2,16,-,-,96GB,4
MiyabiG,MiyabiG,NVIDIA Grace CPU,1,72,NVIDIA Hopper H100 GPU,1,120GB,5
MiyabiC,MiyabiC,Intel Xeon Max 9480,2,56,-,-,128GB,6
RC_GH200,RC_GH200,NVIDIA Grace CPU,1,72,NVIDIA Hopper H100 GPU,1,120GB,7
RC_DGXSP,RC_DGXSP,ARM Cortex-X925 / Cortex-A725,1,20,NVIDIA GB10,1,128GB,8
RC_GENOA,RC_GENOA,AMD EPYC 9684X,2,96,-,-,768GB,9
RC_FX700,RC_FX700,A64FX,1,48,-,-,32GB,10
GenkaiA,GenkaiA,Intel Xeon Platinum 8490H (Sapphire Rapids),2,60,-,-,512GiB,11
GenkaiB,GenkaiB,Intel Xeon Platinum 8490H (Sapphire Rapids),2,60,NVIDIA H100 (Hopper),4,1024GiB,12
GenkaiC,GenkaiC,Intel Xeon Platinum 8480+ (Sapphire Rapids),2,56,NVIDIA H100 (Hopper),8,8TiB,13
Grand_C,Grand_C,Intel Xeon Gold 6548Y+ (Emerald Rapids),2,32,-,-,512GiB,14
Grand_G,Grand_G,Intel Xeon Gold 6548Y+ (Emerald Rapids),2,32,NVIDIA H100 (Hopper),4,512GiB,15
AOBA_A,AOBA_A,SX-Aurora TSUBASA VH,1,24,NEC SX-Aurora TSUBASA Type 20B VE,8,640GB,16
AOBA_B,AOBA_B,AMD EPYC 7702,2,64,-,-,256GB,17
AOBA_S,AOBA_S,SX-Aurora TSUBASA VH,1,64,NEC SX-Aurora TSUBASA Type 30A VE,8,256GB + 768GB,18
Odyssey,Odyssey,A64FX,1,48,-,-,32GiB,19
Aquarius,Aquarius,Intel Xeon Platinum 8360Y,2,36,NVIDIA A100,8,512GiB,20
TSUBAME4,TSUBAME4.0,AMD EPYC 9654,2,96,NVIDIA H100 SXM5 94GB HBM2e,4,768GiB,21
Camphor3,Camphor3,Intel Xeon CPU Max 9480,2,56,-,-,128GiB,22
Pegasus,Pegasus,Intel Xeon Platinum 8468,2,48,NVIDIA H100 PCIe,1,128GiB + 2TiB PMem,23
Sirius,Sirius (PACS12.0),AMD EPYC Zen 4 (MI300A APU),4,24,AMD Instinct MI300A CDNA3,4,512GB HBM3,24
SQUID_CPU,SQUID CPU,Intel Xeon Platinum 8368,2,38,-,-,256GB,25
SQUID_GPU,SQUID GPU,Intel Xeon Platinum 8368,2,38,NVIDIA A100 SXM4 40GB,8,512GB,26
SQUID_VECTOR,SQUID Vector,AMD EPYC 7402P,1,24,NEC SX-Aurora TSUBASA Type20A VE,8,128GB + 384GB,27
OCTOPUS,OCTOPUS,Intel Xeon 6980P (Granite Rapids),2,128,-,-,768GB,28
2 changes: 1 addition & 1 deletion docs/ci.md
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,7 @@ Use these examples when deciding whether to split a pull request or start GitLab
| Example change set / 変更例 | Expected checks / 期待される確認 | GitLab benchmark expectation / GitLab benchmark期待値 |
|---|---|---|
| `docs/ci.md` only / `docs/ci.md`のみ | Review the documentation diff / docs差分をreview | No benchmark run. Direct/manual GitLab pipelines should skip by rules / benchmark不要。直接/手動GitLab pipelineではrulesでskipされる想定 |
| `result_server/routes/usage.py` and `result_server/templates/*.html` / `result_server/routes/usage.py`と`result_server/templates/*.html` | `Result Server Tests` should run / `Result Server Tests`が動く | No benchmark run unless a maintainer intentionally starts one / maintainerが意図して起動しない限りbenchmark不要 |
| `result_server/routes/results_usage_routes.py` and `result_server/templates/*.html` / `result_server/routes/results_usage_routes.py`と`result_server/templates/*.html` | `Result Server Tests` should run / `Result Server Tests`が動く | No benchmark run unless a maintainer intentionally starts one / maintainerが意図して起動しない限りbenchmark不要 |
| `config/system_info.csv` only / `config/system_info.csv`のみ | `Result Server Tests` should verify public site config consistency / 公開site config整合性を`Result Server Tests`で確認 | No benchmark run because this file is portal display metadata / portal表示metadataなのでbenchmark不要 |
| `config/system.csv` or `config/queue.csv` for a public system / 公開system向けの`config/system.csv`または`config/queue.csv` | `Result Server Tests` should run the site config preflight / `Result Server Tests`でsite config preflightを実行 | Start `GitLab Manual CI` too when benchmark execution behavior needs validation / benchmark実行挙動の検証が必要なら`GitLab Manual CI`も起動 |
| `scripts/bk_functions.sh`, `scripts/result.sh`, or `scripts/result_server/**` only / `scripts/bk_functions.sh`、`scripts/result.sh`、または`scripts/result_server/**`のみ | `Result Server Tests` should run when the path filter matches / path filter対象なら`Result Server Tests`が動く | Manual GitLab CI is optional and only needed if upload behavior affects benchmark operation / upload挙動がbenchmark運用に影響する場合だけ手動GitLab CIを検討 |
Expand Down
36 changes: 36 additions & 0 deletions programs/qws/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,10 @@ case "$system" in
echo "Dummy build for FNCX Docker runner test"
echo aaaa > main
;;
AI4SS)
module load nvhpc-hpcx/26.3
make -j 8 omp=1 compiler=nvhpc-hpcx arch=grace rdma= mpi=1
;;
RC_GH200)
module load system/qc-gh200 nvhpc-hpcx/25.9
### QWSはNeoverse版やGPU版はないので汎用版としてとりあえずarch=skylakeを指定している
Expand Down Expand Up @@ -75,6 +79,38 @@ case "$system" in
AOBA_B)
make -j 8 fugaku_benchmark= omp=1 compiler=openmpi-gnu arch=skylake rdma= mpi=1 powerapi= CC=mpicc CXX=mpic++
;;
Odyssey)
module load odyssey
make compiler=fujitsu_cross arch=postk -j 8
;;
Aquarius)
module purge
module load intel
source /work/opt/local/x86_64/cores/intel/2023.0.0/mpi/latest/env/vars.sh
make compiler=intel arch=skylake rdma= -j8
;;
TSUBAME4)
make -j 8 fugaku_benchmark= omp=1 compiler=openmpi-gnu arch=skylake rdma= mpi=1 powerapi= CC=mpicc CXX=mpic++
;;
Camphor3)
camphor3_modulepath="${MODULEPATH:-}"
if [[ -r /etc/profile.d/modules.sh ]]; then
source /etc/profile.d/modules.sh
elif [[ -r /etc/profile.d/z00_lmod.sh ]]; then
source /etc/profile.d/z00_lmod.sh
else
echo "qws: no module init script found" >&2
fi
if [[ -n "${MODULEPATH:-}" ]]; then
camphor3_modulepath="${MODULEPATH}"
fi
module purge
if [[ -n "${camphor3_modulepath:-}" ]]; then
export MODULEPATH="${camphor3_modulepath}"
fi
module load slurm/2022 SysA/2022 intel/2023.2 intelmpi/2023.2 PrgEnvIntel/2023
make -j 8 fugaku_benchmark= omp=1 compiler=intel arch=skylake rdma= mpi=1 powerapi=
;;
*)
echo "Unknown system: $system"
exit 1
Expand Down
7 changes: 6 additions & 1 deletion programs/qws/list.csv
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
system,enable,nodes,numproc_node,nthreads,elapse
AI4SS,yes,1,1,72,0:10:00
Fugaku,yes,1,4,12,0:10:00
FugakuLN,yes,1,1,1,0:10:00
FugakuCN,no,1,4,12,0:10:00
FugakuCN,no,2,4,12,0:10:00
RC_GH200,yes,1,1,72,0:10:00
RC_DGXSP,yes,1,1,20,0:10:00
RC_GENOA,yes,1,1,96,0:10:00
RC_FX700,yes,1,4,12,0:10:00
RC_FX700,yes,1,1,12,0:10:00
MiyabiG,yes,1,1,72,0:10:00
MiyabiC,yes,1,1,112,0:10:00
GenkaiA,yes,1,1,120,0:10:00
Expand All @@ -17,4 +18,8 @@ Grand_G,yes,1,1,64,0:10:00
AOBA_A,yes,1,1,8,0:10:00
AOBA_S,yes,1,1,8,0:10:00
AOBA_B,yes,1,1,128,0:10:00
Odyssey,yes,1,1,12,0:10:00
Aquarius,yes,1,1,8,0:10:00
TSUBAME4,yes,1,1,192,0:10:00
Camphor3,yes,1,1,112,0:10:00
FNCX,yes,1,1,1,0:10:00
60 changes: 60 additions & 0 deletions programs/qws/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,14 @@ case "$system" in
echo 'dummy call for FNCX Docker runner test'
bk_emit_result --fom 99.99 --fom-version dummy --exp FNCXTest --nodes "$nodes" --numproc-node "$numproc_node" --nthreads "$nthreads" >> ../results/result
;;
AI4SS)
module load nvhpc-hpcx/26.3
export OMP_NUM_THREADS=72
export OMP_PLACES=cores
export OMP_PROC_BIND=close
mpirun --bind-to none -n 1 ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 1 >> ../results/result
;;
RC_GH200)
module load system/qc-gh200 nvhpc-hpcx/25.9
mpirun -n 1 --bind-to core --map-by ppr:1:node:PE=72 ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
Expand Down Expand Up @@ -141,6 +149,58 @@ case "$system" in
mpirun -np ${qws_numproc} ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 ${numproc_node} >> ../results/result
;;
Odyssey)
if [[ -r /etc/profile.d/modules.sh ]]; then
source /etc/profile.d/modules.sh
else
echo "qws: /etc/profile.d/modules.sh is not readable" >&2
fi
module unload fjmpi fj odyssey 2>/dev/null || true
module load odyssey fj fjmpi
export OMP_NUM_THREADS=12
export PLE_MPI_STD_EMPTYFILE=off
mpiexec -n 1 -ofout CASE0 ./main 32 6 4 3 1 1 1 1 -1 -1 6 50
print_results CASE0 CASE0 1 >> ../results/result
;;
Aquarius)
module purge
module load intel
source /work/opt/local/x86_64/cores/intel/2023.0.0/mpi/latest/env/vars.sh
export OMP_NUM_THREADS=8
export I_MPI_PIN=1
mpiexec -n 1 ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 1 >> ../results/result
;;
TSUBAME4)
qws_numproc=$((nodes * numproc_node))
mpirun -n ${qws_numproc} ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 ${numproc_node} >> ../results/result
;;
Camphor3)
camphor3_modulepath="${MODULEPATH:-}"
if [[ -r /etc/profile.d/modules.sh ]]; then
source /etc/profile.d/modules.sh
elif [[ -r /etc/profile.d/z00_lmod.sh ]]; then
source /etc/profile.d/z00_lmod.sh
else
echo "qws: no module init script found" >&2
fi
if [[ -n "${MODULEPATH:-}" ]]; then
camphor3_modulepath="${MODULEPATH}"
fi
module purge
if [[ -n "${camphor3_modulepath:-}" ]]; then
export MODULEPATH="${camphor3_modulepath}"
fi
module load intel/2023.2 intelmpi/2023.2 PrgEnvIntel/2023
export OMP_NUM_THREADS="${nthreads}"
export I_MPI_PIN=1
if [[ "${SLURM_CONF:-}" == /etc/slurm/sysA/* ]]; then
unset SLURM_CONF
fi
srun -n 1 -c "${nthreads}" ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 1 >> ../results/result
;;
*)
echo "Unknown Running system: $system"
exit 1
Expand Down
2 changes: 1 addition & 1 deletion requirements-result-server.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# result_server requires Python 3.12+ for safe tar extraction via tarfile filter="data".
# result_server is tested and deployed on Python 3.12+; archive handling uses explicit path/type validation.
Flask>=3.0,<4.0
Flask-Session>=0.8,<1.0
Flask-WTF>=1.2,<2.0
Expand Down
Loading
Loading