Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 3 additions & 1 deletion config/queue.csv
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ NQSV_AOBA_B,qsub,"-Z -v http_proxy,https_proxy,HTTP_PROXY,HTTPS_PROXY -q ${queue
PJM_WISTERIA_O,pjsub,"-g jh260034o -L rscgrp=${queue_group},elapse=${elapse},node=${nodes} --mpi proc=${proc} --omp thread=${nthreads}"
PJM_WISTERIA_A,pjsub,"-g jh260034a -L rscgrp=${queue_group},elapse=${elapse},node=${nodes} --mpi proc=${proc} --omp thread=${nthreads}"
PBS_TSUKUBA,qsub,"-q ${queue_group} -l select=${nodes}:mpiprocs=${numproc_node}:ompthreads=${nthreads} -l walltime=${elapse}"
AGE_TSUBAME4,qsub,"-l ${queue_group}=${nodes} -l h_rt=${elapse}"
PBS_PEGASUS,qsub,"-q ${queue_group} -A CNTBENCH -l elapstim_req=${elapse} -v OMP_NUM_THREADS=${nthreads}"
PBS_SIRIUS,qsub,"-q ${queue_group} -A CNTBENCH -W group_list=CNTBENCH -l select=${nodes}:ncpus=24:mem=124gb:ngpus=1 -l walltime=${elapse}"
AGE_TSUBAME4,qsub,"-g jh260034 -l ${queue_group}=${nodes} -l h_rt=${elapse}"
SLURM_CAMPHOR3,sbatch,"-p ${queue_group} -t ${elapse} --rsc p=${proc}:t=${nthreads}:c=${nthreads}:m=1G"
NQSV_OSAKA_CPU,qsub,"-q ${queue_group} -b ${nodes} -l elapstim_req=${elapse},cpunum_job=${nthreads}"
NQSV_OSAKA_GPU,qsub,"-q ${queue_group} -b ${nodes} -l elapstim_req=${elapse},cpunum_job=${nthreads},gpunum_job=${gpu_per_node}"
Expand Down
6 changes: 3 additions & 3 deletions config/system.csv
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@ AOBA_B,cross,aoba_ab_login,aoba_ab_jacamar,NQSV_AOBA_B,lx
AOBA_S,cross,aoba_s_login,aoba_s_jacamar,NQSV_AOBA_VE,sxs
Odyssey,cross,wisteria_login,wisteria-o_jacamar,PJM_WISTERIA_O,short-o
Aquarius,cross,wisteria_login,wisteria-a_jacamar,PJM_WISTERIA_A,short-a
Pegasus,cross,pegasus_login,pegasus_jacamar,PBS_TSUKUBA,regular
Sirius,cross,sirius_login,sirius_jacamar,PBS_TSUKUBA,regular
TSUBAME4,cross,tsubame4_login,tsubame4_jacamar,AGE_TSUBAME4,node_f
Pegasus,cross,pegasus_login,pegasus_jacamar,PBS_PEGASUS,gpu
Sirius,cross,sirius_login,sirius_jacamar,PBS_SIRIUS,mcrp
TSUBAME4,cross,tsubame4_login,tsubame4_jacamar,AGE_TSUBAME4,cpu_4
Camphor3,cross,camphor3_login,camphor3_jacamar,SLURM_CAMPHOR3,jha
SQUID_CPU,cross,squid_login,squid_jacamar,NQSV_OSAKA_CPU,SQUID
SQUID_GPU,cross,squid_login,squid_jacamar,NQSV_OSAKA_GPU,SQUID
Expand Down
26 changes: 22 additions & 4 deletions docs/guides/add-site.md
Original file line number Diff line number Diff line change
Expand Up @@ -109,6 +109,8 @@ ARM64 ログインノードでは `--arch arm64` を指定します。
- `--no-systemd` / `--no-start`
- systemd user service を作らない、または作るだけで起動しない場合に使います

Jacamar-CI のビルドは、ログインノードのプロセス数・メモリ制限に当たりにくいよう、既定で `make -j1`、`GOMAXPROCS=1`、`GOFLAGS="-p=1 -gcflags=all=-dwarf=false"` を使います。余裕のある環境では `JACAMAR_BUILD_MAKE_JOBS`、`JACAMAR_BUILD_GOMAXPROCS`、`JACAMAR_BUILD_GOFLAGS` で上書きできます。

このスクリプトは `config.toml` の `environment` に `PATH=$BASE_DIR/bin:...` を登録時点で入れるため、アーティファクト保存時に `gitlab-runner` が見つからない問題も避けられます。以下の手動手順は、スクリプトが失敗した場合の切り分けや、サイト固有に調整したい場合の参照として使ってください。

---
Expand Down Expand Up @@ -144,6 +146,7 @@ $BASE_DIR/
├── custom-config.toml # Jacamar 設定ファイル
├── config.sh # カスタムランナー: config
├── prepare.sh # カスタムランナー: prepare
├── runner-env.sh # カスタムランナー: 共通環境初期化
├── run.sh # カスタムランナー: run
└── cleanup.sh # カスタムランナー: cleanup
```
Expand Down Expand Up @@ -202,8 +205,10 @@ cd jacamar-ci
export CC=gcc
export CXX=g++
export CGO_ENABLED=1
export GOMAXPROCS=1
export GOFLAGS="-p=1 -gcflags=all=-dwarf=false"

make build
make -j1 build
make install PREFIX="$BASE_DIR"

# 後片付け
Expand Down Expand Up @@ -233,7 +238,9 @@ git clone https://gitlab.com/ecp-ci/jacamar-ci.git
cp tools.go jacamar-ci/internal/executors/pbs/

cd jacamar-ci
make build
export GOMAXPROCS=1
export GOFLAGS="-p=1 -gcflags=all=-dwarf=false"
make -j1 build
make install PREFIX="$BASE_DIR"
```

Expand Down Expand Up @@ -290,7 +297,9 @@ export CPATH="${SEC_PREFIX}/include:${CPATH:-}"
git clone https://gitlab.com/ecp-ci/jacamar-ci.git
cd jacamar-ci
export CC=gcc CXX=g++ CGO_ENABLED=1
make build
export GOMAXPROCS=1
export GOFLAGS="-p=1 -gcflags=all=-dwarf=false"
make -j1 build
make install PREFIX="${WORKDIR}"

# --- 5. 後片付け ---
Expand Down Expand Up @@ -347,10 +356,19 @@ set -euo pipefail
exit 0
```

### `runner-env.sh` - 共通環境初期化

`run.sh` から source される共通環境初期化ファイルです。非対話 shell でも site の module catalog やユーザの基本環境が見えるように、`/etc/profile`、`/etc/bashrc`、module 初期化ファイル、`~/.bashrc` を順に読みます。アプリごとの `build.sh` / `run.sh` は、原則として site の shell 初期化そのものではなく、必要な `module load` と実行コマンドだけを持ちます。

### `run.sh` - ジョブ実行
```bash
#!/usr/bin/env bash
source ~/.bashrc
RUNNER_ENV="${CUSTOM_DIR:-/path/to/gitlab-runner_jacamar-ci_amd}/runner-env.sh"
if [[ -r "${RUNNER_ENV}" ]]; then
source "${RUNNER_ENV}"
elif [[ -r "${HOME}/.bashrc" ]]; then
source "${HOME}/.bashrc"
fi
set -eo pipefail
exec "$@"
```
Expand Down
14 changes: 13 additions & 1 deletion programs/qws/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -89,8 +89,20 @@ case "$system" in
source /work/opt/local/x86_64/cores/intel/2023.0.0/mpi/latest/env/vars.sh
make compiler=intel arch=skylake rdma= -j8
;;
Pegasus)
module load intel/2025.3.1 intmpi/2025.3.1
make compiler=intel arch=skylake mpi=1 omp=1 rdma=
;;
Sirius)
module load aocc/5.0.0 openmpi/5.0.10/aocc5.0.0
make -j4 compiler=aocc arch=zen4 rdma= mpi=1 omp=1 profiler=timing \
AMD_MARCH=-march=znver4 cppflags="-DARCH_AVX512" main
;;
TSUBAME4)
make -j 8 fugaku_benchmark= omp=1 compiler=openmpi-gnu arch=skylake rdma= mpi=1 powerapi= CC=mpicc CXX=mpic++
module load openmpi/5.0.10-gcc aocc/4.1.0
export OMPI_CC=clang OMPI_CXX=clang++ OMPI_FC=flang
make -j4 compiler=aocc arch=zen4 rdma= mpi=1 omp=1 profiler=timing \
AMD_MARCH=-march=znver4 cppflags="-DARCH_AVX512" main
;;
Camphor3)
camphor3_modulepath="${MODULEPATH:-}"
Expand Down
4 changes: 3 additions & 1 deletion programs/qws/list.csv
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ AOBA_S,yes,1,1,8,0:10:00
AOBA_B,yes,1,1,128,0:10:00
Odyssey,yes,1,1,12,0:10:00
Aquarius,yes,1,1,8,0:10:00
TSUBAME4,yes,1,1,192,0:10:00
Pegasus,yes,1,1,96,00:10:00
Sirius,yes,1,1,24,0:10:00
TSUBAME4,yes,1,1,4,0:10:00
Camphor3,yes,1,1,112,0:10:00
FNCX,yes,1,1,1,0:10:00
14 changes: 14 additions & 0 deletions programs/qws/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -171,8 +171,22 @@ case "$system" in
mpiexec -n 1 ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 1 >> ../results/result
;;
Pegasus)
qws_numproc=$((nodes * numproc_node))
module load intel/2025.3.1 intmpi/2025.3.1
mpirun -n ${qws_numproc} ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 ${numproc_node} >> ../results/result
;;
Sirius)
qws_numproc=$((nodes * numproc_node))
module load aocc/5.0.0 openmpi/5.0.10/aocc5.0.0
mpirun -n ${qws_numproc} ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 ${numproc_node} >> ../results/result
;;
TSUBAME4)
qws_numproc=$((nodes * numproc_node))
module load openmpi/5.0.10-gcc aocc/4.1.0
export OMPI_CC=clang OMPI_CXX=clang++ OMPI_FC=flang
mpirun -n ${qws_numproc} ./main 32 6 4 3 1 1 1 1 -1 -1 6 50 > CASE0
print_results CASE0 CASE0 ${numproc_node} >> ../results/result
;;
Expand Down
51 changes: 46 additions & 5 deletions scripts/setup_site_runner.sh
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,9 @@ install_systemd=1
start_service=1
libseccomp_mode="auto"
jacamar_pbs_tools=""
jacamar_make_jobs="${JACAMAR_BUILD_MAKE_JOBS:-1}"
jacamar_gomaxprocs="${JACAMAR_BUILD_GOMAXPROCS:-1}"
jacamar_goflags="${JACAMAR_BUILD_GOFLAGS:--p=1 -gcflags=all=-dwarf=false}"
unrestricted_cmd_line=false
runner_proxy=""
runner_no_proxy=""
Expand Down Expand Up @@ -65,6 +68,12 @@ Options:
--no-start Create and enable service, but do not start it.
-h, --help Show this help.

Environment overrides:
JACAMAR_BUILD_MAKE_JOBS Jacamar build make parallelism. Default: 1.
JACAMAR_BUILD_GOMAXPROCS Jacamar build Go scheduler threads. Default: 1.
JACAMAR_BUILD_GOFLAGS Jacamar build Go flags.
Default: -p=1 -gcflags=all=-dwarf=false.

Example:
curl -fsSL https://raw.githubusercontent.com/RIKEN-RCCS/benchkit/main/scripts/setup_site_runner.sh \
| bash -s -- --arch amd64 --site genkai \
Expand Down Expand Up @@ -297,7 +306,10 @@ if [[ ! -x "$jacamar_bin" ]]; then
(
cd "${work_dir}/jacamar-ci"
export CC=gcc CXX=g++ CGO_ENABLED=1
make build
export GOMAXPROCS="${GOMAXPROCS:-$jacamar_gomaxprocs}"
export GOFLAGS="${GOFLAGS:-$jacamar_goflags}"
info "Using Jacamar build limits: make -j${jacamar_make_jobs}, GOMAXPROCS=${GOMAXPROCS}, GOFLAGS=${GOFLAGS}"
make -j"$jacamar_make_jobs" build
make install PREFIX="$base_dir"
)
else
Expand Down Expand Up @@ -349,11 +361,40 @@ set -euo pipefail
exit 0
EOF

cat > "${base_dir}/run.sh" <<'EOF'
cat > "${base_dir}/runner-env.sh" <<'EOF'
#!/usr/bin/env bash
source ~/.bashrc

source_if_readable() {
local file="$1"
if [[ -r "$file" ]]; then
# shellcheck disable=SC1090
source "$file" || true
fi
}

source_if_readable /etc/profile
source_if_readable /etc/bashrc

if ! type module >/dev/null 2>&1; then
source_if_readable /etc/profile.d/modules.sh
source_if_readable /etc/profile.d/z00_lmod.sh
fi

source_if_readable "${HOME}/.bashrc"

unset -f source_if_readable
EOF

cat > "${base_dir}/run.sh" <<EOF
#!/usr/bin/env bash
RUNNER_ENV="\${CUSTOM_DIR:-${base_dir}}/runner-env.sh"
if [[ -r "\${RUNNER_ENV}" ]]; then
source "\${RUNNER_ENV}"
elif [[ -r "\${HOME}/.bashrc" ]]; then
source "\${HOME}/.bashrc"
fi
set -eo pipefail
exec "$@"
exec "\$@"
EOF

cat > "${base_dir}/cleanup.sh" <<EOF
Expand All @@ -378,7 +419,7 @@ esac
echo "CLEANUP DONE at \$(date)" >> "\$LOGFILE"
EOF

chmod +x "${base_dir}/config.sh" "${base_dir}/prepare.sh" "${base_dir}/run.sh" "${base_dir}/cleanup.sh"
chmod +x "${base_dir}/config.sh" "${base_dir}/prepare.sh" "${base_dir}/runner-env.sh" "${base_dir}/run.sh" "${base_dir}/cleanup.sh"

info "Writing Jacamar config"
cat > "${base_dir}/custom-config.toml" <<EOF
Expand Down
20 changes: 13 additions & 7 deletions scripts/test_submit.sh
Original file line number Diff line number Diff line change
Expand Up @@ -142,17 +142,23 @@ case "$system" in
--mpi proc=$proc --omp thread=$nthreads \
script.sh
;;
Pegasus|Sirius)
echo qsub -q $queue_group \
-l select=${nodes}:mpiprocs=${numproc_node}:ompthreads=${nthreads} \
Pegasus)
echo qsub -q $queue_group -A CNTBENCH \
-l elapstim_req=${elapse} -v OMP_NUM_THREADS=${nthreads} script.sh
qsub -q $queue_group -A CNTBENCH \
-l elapstim_req=${elapse} -v OMP_NUM_THREADS=${nthreads} script.sh
;;
Sirius)
echo qsub -q $queue_group -A CNTBENCH -W group_list=CNTBENCH \
-l select=${nodes}:ncpus=24:mem=124gb:ngpus=1 \
-l walltime=${elapse} script.sh
qsub -q $queue_group \
-l select=${nodes}:mpiprocs=${numproc_node}:ompthreads=${nthreads} \
qsub -q $queue_group -A CNTBENCH -W group_list=CNTBENCH \
-l select=${nodes}:ncpus=24:mem=124gb:ngpus=1 \
-l walltime=${elapse} script.sh
;;
TSUBAME4)
echo qsub -l ${queue_group}=${nodes} -l h_rt=${elapse} script.sh
qsub -l ${queue_group}=${nodes} -l h_rt=${elapse} script.sh
echo qsub -g jh260034 -l ${queue_group}=${nodes} -l h_rt=${elapse} script.sh
qsub -g jh260034 -l ${queue_group}=${nodes} -l h_rt=${elapse} script.sh
;;
Camphor3)
proc=$((nodes * numproc_node))
Expand Down
Loading