Skip to content

Commit aaa86f6

Browse files
committed
Simplify multiplicity settings in async reco scripts
1 parent fef755c commit aaa86f6

File tree

3 files changed

+68
-71
lines changed

3 files changed

+68
-71
lines changed

DATA/production/configurations/asyncReco/async_pass.sh

Lines changed: 27 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -412,67 +412,46 @@ fi
412412
echo "SETTING_ROOT_OUTPUT = $SETTING_ROOT_OUTPUT"
413413

414414
# Enabling GPUs
415-
if [[ -n "$ALIEN_JDL_USEGPUS" && $ALIEN_JDL_USEGPUS != 0 ]] ; then
416-
echo "Enabling GPUS"
417-
[[ -z ${GPUTYPE:-} ]] && export GPUTYPE="HIP"
418-
[[ -z ${GPUMEMSIZE:-} ]] && export GPUMEMSIZE=$((25 << 30))
419-
if [[ "0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS" != "01" ]]; then
415+
if [[ $ASYNC_PASS_NO_OPTIMIZED_DEFAULTS != 1 ]]; then
416+
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
417+
if [[ $ALIEN_JDL_USEGPUS == 1 ]] ; then
418+
echo "Enabling GPUS"
419+
if [[ -z $ALIEN_JDL_SITEARCH ]]; then echo "ERROR: Must set ALIEN_JDL_SITEARCH to define GPU architecture!"; exit 1; fi
420+
if [[ $ALIEN_JDL_SITEARCH == "NERSC" ]]; then # Disable mlock / ulimit / gpu memory registration - has performance impact, but doesn't work at NERSC for now
421+
export SETENV_NO_ULIMIT=1
422+
export CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow+="GPU_proc.noGPUMemoryRegistration=1;"
423+
fi
424+
ALIEN_JDL_SITEARCH_TMP=$ALIEN_JDL_SITEARCH
425+
if [[ $ALIEN_JDL_SITEARCH == "EPN_MI100" ]]; then
426+
ALIEN_JDL_SITEARCH_TMP=EPN
427+
export EPN_NODE_MI100=1
428+
elif [[ $ALIEN_JDL_SITEARCH == "EPN_MI50" ]]; then
429+
ALIEN_JDL_SITEARCH_TMP=EPN
430+
fi
420431
if [[ "ALIEN_JDL_USEFULLNUMADOMAIN" == 0 ]]; then
421432
if [[ $keep -eq 0 ]]; then
422433
if [[ $ALIEN_JDL_UNOPTIMIZEDGPUSETTINGS != 1 ]]; then
423-
export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu # sets the multiplicities to optimized defaults for this configuration (1 job with 1 gpu on EPNs)
424-
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
434+
export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITEARCH_TMP} # (16 cores, 1 gpu per job, pp)
425435
else
426-
# forcing multiplicities to be 1
427-
export MULTIPLICITY_PROCESS_tof_matcher=1
428-
export MULTIPLICITY_PROCESS_mch_cluster_finder=1
429-
export MULTIPLICITY_PROCESS_tpc_entropy_decoder=1
430-
export MULTIPLICITY_PROCESS_itstpc_track_matcher=1
431-
export MULTIPLICITY_PROCESS_its_tracker=1
432-
export OMP_NUM_THREADS=4
433-
export TIMEFRAME_RATE_LIMIT=8
434-
export SHMSIZE=30000000000
436+
export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITEARCH_TMP}_unoptimized # (16 cores, 1 gpu per job, pp, low CPU multiplicities)
435437
fi
436438
else
437-
export TIMEFRAME_RATE_LIMIT=4
438-
export SHMSIZE=30000000000
439+
export OPTIMIZED_PARALLEL_ASYNC=keep_root
439440
fi
440441
else
441442
if [[ $BEAMTYPE == "pp" ]]; then
442-
export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, pp)
443-
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
443+
export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu_${ALIEN_JDL_SITEARCH_TMP} # (64 cores, 1 NUMA, 4 gpu per job, pp)
444444
else # PbPb
445-
export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, PbPb)
446-
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
445+
export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu_${ALIEN_JDL_SITEARCH_TMP} # (64 cores, 1 NUMA 4 gpu per job, PbPb)
447446
fi
448447
fi
449-
fi
450-
else
451-
# David, Oct 13th
452-
# the optimized settings for the 8 core GRID queue without GPU are
453-
# (overwriting the values above)
454-
#
455-
if [[ "0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS" != "01" ]]; then
456-
if [[ "$ALIEN_JDL_EPNFULLNUMACPUONLY" != 1 ]]; then
457-
if [[ $BEAMTYPE == "pp" ]]; then
458-
if (( $(echo "$RUN_IR > 800000" | bc -l) )); then
459-
export TIMEFRAME_RATE_LIMIT=1
460-
elif (( $(echo "$RUN_IR < 50000" | bc -l) )); then
461-
export TIMEFRAME_RATE_LIMIT=6
462-
else
463-
export TIMEFRAME_RATE_LIMIT=3
464-
fi
465-
export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu # sets the multiplicities to optimized defaults for this configuration (grid)
466-
export SHMSIZE=16000000000
467-
else # PbPb
468-
export TIMEFRAME_RATE_LIMIT=2
469-
export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu
470-
export SHMSIZE=16000000000
471-
export SVERTEX_THREADS=5
472-
fi
448+
else
449+
export SETENV_NO_ULIMIT=1
450+
export DPL_DEFAULT_PIPELINE_LENGTH=16 # to avoid memory issues - affects performance, so don't do with GPUs
451+
if [[ $ALIEN_JDL_EPNFULLNUMACPUONLY != 1 ]]; then
452+
export OPTIMIZED_PARALLEL_ASYNC=8cpu # (8 cores per job, grid)
473453
else
474-
export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # to use EPNs with full NUMA domain but without GPUs
475-
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
454+
export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # (64 cores per job, 1 NUMA, EPN)
476455
fi
477456
fi
478457
fi

DATA/production/configurations/asyncReco/setenv_extra.sh

Lines changed: 0 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -3,15 +3,6 @@
33

44
# process flags passed to the script
55

6-
if [[ -z "$ALIEN_JDL_USEGPUS" || $ALIEN_JDL_USEGPUS != 1 ]]; then
7-
export SETENV_NO_ULIMIT=1
8-
fi
9-
10-
# to avoid memory issues - we don't do this on the EPNs, since it can affect the performance
11-
if [[ $ALIEN_JDL_USEGPUS != 1 ]]; then
12-
export DPL_DEFAULT_PIPELINE_LENGTH=16
13-
fi
14-
156
# check if this is a production on skimmed data
167
if grep -q /skimmed/ wn.xml ; then
178
export ON_SKIMMED_DATA=1;

DATA/production/workflow-multiplicities.sh

Lines changed: 41 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -53,11 +53,23 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
5353
[[ ! -z ${TIMEFRAME_RATE_LIMIT:-} ]] && unset TIMEFRAME_RATE_LIMIT
5454
[[ ! -z ${SHMSIZE:-} ]] && unset SHMSIZE
5555
fi
56-
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_8cpu" ]]; then
56+
if [[ $OPTIMIZED_PARALLEL_ASYNC == "8cpu" ]]; then
5757
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=3
5858
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=16000000000
5959
NGPURECOTHREADS=5
60-
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_16cpu" ]]; then
60+
if [[ $BEAMTYPE == "pp" ]]; then
61+
if (( $(echo "$RUN_IR > 800000" | bc -l) )); then
62+
TIMEFRAME_RATE_LIMIT=1
63+
elif (( $(echo "$RUN_IR < 50000" | bc -l) )); then
64+
TIMEFRAME_RATE_LIMIT=6
65+
else
66+
TIMEFRAME_RATE_LIMIT=3
67+
fi
68+
else # PbPb
69+
TIMEFRAME_RATE_LIMIT=2
70+
SVERTEX_THREADS=5
71+
fi
72+
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "16cpu" ]]; then
6173
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=8
6274
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=22000000000
6375
NGPURECOTHREADS=9
@@ -78,20 +90,28 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
7890
N_MCHCL=3
7991
N_TOFMATCH=2
8092
N_TPCENTDEC=3
81-
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu" ]]; then
93+
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu_EPN" || $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu_EPN_unoptimized" ]]; then
8294
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=8
8395
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=30000000000
84-
N_TOFMATCH=2
85-
N_MCHCL=3
86-
N_TPCENTDEC=2
87-
N_TPCITS=3
96+
NGPUS=1
97+
GPUTYPE=HIP
98+
GPUMEMSIZE=$((25 << 30))
8899
N_MCHTRK=2
89-
N_ITSTRK=3
90-
NGPURECOTHREADS=8
100+
N_TPCTRK=$NGPUS
101+
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu_EPN" ]; then
102+
N_TOFMATCH=2
103+
N_MCHCL=3
104+
N_TPCENTDEC=2
105+
N_TPCITS=3
106+
N_ITSTRK=3
107+
NGPURECOTHREADS=8
108+
else
109+
NGPURECOTHREADS=4
110+
fi
91111
NTRDTRKTHREADS=3
92112
ITSTRK_THREADS=2
93113
ITSTPC_THREADS=2
94-
elif [[ $OPTIMIZED_PARALLEL_ASYNC =~ ^pp_4gpu(_|$) ]]; then
114+
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_gpu_NERSC" || $OPTIMIZED_PARALLEL_ASYNC == "pp_4gpu_EPN" ]]; then
95115
if [[ -z ${TIMEFRAME_RATE_LIMIT:-} ]]; then
96116
if [[ ! -z ${ALIEN_JDL_LPMANCHORYEAR} && ${ALIEN_JDL_LPMANCHORYEAR} -lt 2023 ]]; then
97117
TIMEFRAME_RATE_LIMIT=45
@@ -100,19 +120,21 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
100120
fi
101121
fi
102122
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=100000000000
103-
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_4gpu_NERSC" ]]; then
123+
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_gpu_NERSC" ]]; then
104124
NGPUS=1
105125
GPUTYPE=CUDA
106126
else
107127
NGPUS=4
128+
GPUTYPE=HIP
108129
fi
130+
GPUMEMSIZE=$((25 << 30))
109131
NGPURECOTHREADS=8
110132
NTRDTRKTHREADS=2
111133
ITSTRK_THREADS=2
112134
ITSTPC_THREADS=2
113135
SVERTEX_THREADS=4
114136
TPCTIMESERIES_THREADS=2
115-
N_TPCTRK=4
137+
N_TPCTRK=$NGPUS
116138
N_FWDMATCH=2
117139
N_PRIMVTXMATCH=1
118140
N_PRIMVTX=1
@@ -128,16 +150,18 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
128150
N_ITSTRK=12
129151
N_ITSCL=2
130152
export DPL_SMOOTH_RATE_LIMITING=1
131-
elif [[ $OPTIMIZED_PARALLEL_ASYNC =~ ^PbPb_4gpu(_|$) ]]; then
153+
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "PbPb_gpu_NERSC" || $OPTIMIZED_PARALLEL_ASYNC == "PbPb_4gpu_EPN" ]]; then
132154
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=35
133155
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=100000000000 # SHM_LIMIT 3/4
134156
[[ -z ${TIMEFRAME_SHM_LIMIT:-} ]] && TIMEFRAME_SHM_LIMIT=$(($SHMSIZE / 3))
135-
if [[ $OPTIMIZED_PARALLEL_ASYNC == "PbPb_4gpu_NERSC" ]]; then
157+
if [[ $OPTIMIZED_PARALLEL_ASYNC == "PbPb_gpu_NERSC" ]]; then
136158
NGPUS=1
137159
GPUTYPE=CUDA
138160
else
139161
NGPUS=4
162+
GPUTYPE=HIP
140163
fi
164+
GPUMEMSIZE=$((25 << 30))
141165
NGPURECOTHREADS=8
142166
NTRDTRKTHREADS=8
143167
ITSTRK_THREADS=5
@@ -172,6 +196,9 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
172196
N_MCHTRK=1
173197
N_TOFMATCH=9
174198
N_TPCTRK=6
199+
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "keep_root" ]]; then
200+
TIMEFRAME_RATE_LIMIT=4
201+
SHMSIZE=30000000000
175202
else
176203
echo "Invalid optimized setting '$OPTIMIZED_PARALLEL_ASYNC'" 1>&2
177204
exit 1

0 commit comments

Comments
 (0)