Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
75 changes: 27 additions & 48 deletions DATA/production/configurations/asyncReco/async_pass.sh
Original file line number Diff line number Diff line change
Expand Up @@ -412,67 +412,46 @@ fi
echo "SETTING_ROOT_OUTPUT = $SETTING_ROOT_OUTPUT"

# Enabling GPUs
if [[ -n "$ALIEN_JDL_USEGPUS" && $ALIEN_JDL_USEGPUS != 0 ]] ; then
echo "Enabling GPUS"
[[ -z ${GPUTYPE:-} ]] && export GPUTYPE="HIP"
[[ -z ${GPUMEMSIZE:-} ]] && export GPUMEMSIZE=$((25 << 30))
if [[ "0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS" != "01" ]]; then
if [[ $ASYNC_PASS_NO_OPTIMIZED_DEFAULTS != 1 ]]; then
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
if [[ $ALIEN_JDL_USEGPUS == 1 ]] ; then
echo "Enabling GPUS"
if [[ -z $ALIEN_JDL_SITEARCH ]]; then echo "ERROR: Must set ALIEN_JDL_SITEARCH to define GPU architecture!"; exit 1; fi
if [[ $ALIEN_JDL_SITEARCH == "NERSC" ]]; then # Disable mlock / ulimit / gpu memory registration - has performance impact, but doesn't work at NERSC for now
export SETENV_NO_ULIMIT=1
export CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow+="GPU_proc.noGPUMemoryRegistration=1;"
fi
ALIEN_JDL_SITEARCH_TMP=$ALIEN_JDL_SITEARCH
if [[ $ALIEN_JDL_SITEARCH == "EPN_MI100" ]]; then
ALIEN_JDL_SITEARCH_TMP=EPN
export EPN_NODE_MI100=1
elif [[ $ALIEN_JDL_SITEARCH == "EPN_MI50" ]]; then
ALIEN_JDL_SITEARCH_TMP=EPN
fi
if [[ "ALIEN_JDL_USEFULLNUMADOMAIN" == 0 ]]; then
if [[ $keep -eq 0 ]]; then
if [[ $ALIEN_JDL_UNOPTIMIZEDGPUSETTINGS != 1 ]]; then
export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu # sets the multiplicities to optimized defaults for this configuration (1 job with 1 gpu on EPNs)
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITEARCH_TMP} # (16 cores, 1 gpu per job, pp)
else
# forcing multiplicities to be 1
export MULTIPLICITY_PROCESS_tof_matcher=1
export MULTIPLICITY_PROCESS_mch_cluster_finder=1
export MULTIPLICITY_PROCESS_tpc_entropy_decoder=1
export MULTIPLICITY_PROCESS_itstpc_track_matcher=1
export MULTIPLICITY_PROCESS_its_tracker=1
export OMP_NUM_THREADS=4
export TIMEFRAME_RATE_LIMIT=8
export SHMSIZE=30000000000
export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITEARCH_TMP}_unoptimized # (16 cores, 1 gpu per job, pp, low CPU multiplicities)
fi
else
export TIMEFRAME_RATE_LIMIT=4
export SHMSIZE=30000000000
export OPTIMIZED_PARALLEL_ASYNC=keep_root
fi
else
if [[ $BEAMTYPE == "pp" ]]; then
export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, pp)
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu_${ALIEN_JDL_SITEARCH_TMP} # (64 cores, 1 NUMA, 4 gpu per job, pp)
else # PbPb
export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, PbPb)
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu_${ALIEN_JDL_SITEARCH_TMP} # (64 cores, 1 NUMA 4 gpu per job, PbPb)
fi
fi
fi
else
# David, Oct 13th
# the optimized settings for the 8 core GRID queue without GPU are
# (overwriting the values above)
#
if [[ "0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS" != "01" ]]; then
if [[ "$ALIEN_JDL_EPNFULLNUMACPUONLY" != 1 ]]; then
if [[ $BEAMTYPE == "pp" ]]; then
if (( $(echo "$RUN_IR > 800000" | bc -l) )); then
export TIMEFRAME_RATE_LIMIT=1
elif (( $(echo "$RUN_IR < 50000" | bc -l) )); then
export TIMEFRAME_RATE_LIMIT=6
else
export TIMEFRAME_RATE_LIMIT=3
fi
export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu # sets the multiplicities to optimized defaults for this configuration (grid)
export SHMSIZE=16000000000
else # PbPb
export TIMEFRAME_RATE_LIMIT=2
export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu
export SHMSIZE=16000000000
export SVERTEX_THREADS=5
fi
else
export SETENV_NO_ULIMIT=1
export DPL_DEFAULT_PIPELINE_LENGTH=16 # to avoid memory issues - affects performance, so don't do with GPUs
if [[ $ALIEN_JDL_EPNFULLNUMACPUONLY != 1 ]]; then
export OPTIMIZED_PARALLEL_ASYNC=8cpu # (8 cores per job, grid)
else
export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # to use EPNs with full NUMA domain but without GPUs
export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # (64 cores per job, 1 NUMA, EPN)
fi
fi
fi
Expand Down
9 changes: 0 additions & 9 deletions DATA/production/configurations/asyncReco/setenv_extra.sh
Original file line number Diff line number Diff line change
Expand Up @@ -3,15 +3,6 @@

# process flags passed to the script

if [[ -z "$ALIEN_JDL_USEGPUS" || $ALIEN_JDL_USEGPUS != 1 ]]; then
export SETENV_NO_ULIMIT=1
fi

# to avoid memory issues - we don't do this on the EPNs, since it can affect the performance
if [[ $ALIEN_JDL_USEGPUS != 1 ]]; then
export DPL_DEFAULT_PIPELINE_LENGTH=16
fi

# check if this is a production on skimmed data
if grep -q /skimmed/ wn.xml ; then
export ON_SKIMMED_DATA=1;
Expand Down
55 changes: 41 additions & 14 deletions DATA/production/workflow-multiplicities.sh
Original file line number Diff line number Diff line change
Expand Up @@ -53,11 +53,23 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
[[ ! -z ${TIMEFRAME_RATE_LIMIT:-} ]] && unset TIMEFRAME_RATE_LIMIT
[[ ! -z ${SHMSIZE:-} ]] && unset SHMSIZE
fi
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_8cpu" ]]; then
if [[ $OPTIMIZED_PARALLEL_ASYNC == "8cpu" ]]; then
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=3
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=16000000000
NGPURECOTHREADS=5
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_16cpu" ]]; then
if [[ $BEAMTYPE == "pp" ]]; then
if (( $(echo "$RUN_IR > 800000" | bc -l) )); then
TIMEFRAME_RATE_LIMIT=1
elif (( $(echo "$RUN_IR < 50000" | bc -l) )); then
TIMEFRAME_RATE_LIMIT=6
else
TIMEFRAME_RATE_LIMIT=3
fi
else # PbPb
TIMEFRAME_RATE_LIMIT=2
SVERTEX_THREADS=5
fi
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "16cpu" ]]; then
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=8
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=22000000000
NGPURECOTHREADS=9
Expand All @@ -78,20 +90,28 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
N_MCHCL=3
N_TOFMATCH=2
N_TPCENTDEC=3
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu" ]]; then
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu_EPN" || $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu_EPN_unoptimized" ]]; then
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=8
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=30000000000
N_TOFMATCH=2
N_MCHCL=3
N_TPCENTDEC=2
N_TPCITS=3
NGPUS=1
GPUTYPE=HIP
GPUMEMSIZE=$((25 << 30))
N_MCHTRK=2
N_ITSTRK=3
NGPURECOTHREADS=8
N_TPCTRK=$NGPUS
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_1gpu_EPN" ]]; then
N_TOFMATCH=2
N_MCHCL=3
N_TPCENTDEC=2
N_TPCITS=3
N_ITSTRK=3
NGPURECOTHREADS=8
else
NGPURECOTHREADS=4
fi
NTRDTRKTHREADS=3
ITSTRK_THREADS=2
ITSTPC_THREADS=2
elif [[ $OPTIMIZED_PARALLEL_ASYNC =~ ^pp_4gpu(_|$) ]]; then
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_gpu_NERSC" || $OPTIMIZED_PARALLEL_ASYNC == "pp_4gpu_EPN" ]]; then
if [[ -z ${TIMEFRAME_RATE_LIMIT:-} ]]; then
if [[ ! -z ${ALIEN_JDL_LPMANCHORYEAR} && ${ALIEN_JDL_LPMANCHORYEAR} -lt 2023 ]]; then
TIMEFRAME_RATE_LIMIT=45
Expand All @@ -100,19 +120,21 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
fi
fi
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=100000000000
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_4gpu_NERSC" ]]; then
if [[ $OPTIMIZED_PARALLEL_ASYNC == "pp_gpu_NERSC" ]]; then
NGPUS=1
GPUTYPE=CUDA
else
NGPUS=4
GPUTYPE=HIP
fi
GPUMEMSIZE=$((25 << 30))
NGPURECOTHREADS=8
NTRDTRKTHREADS=2
ITSTRK_THREADS=2
ITSTPC_THREADS=2
SVERTEX_THREADS=4
TPCTIMESERIES_THREADS=2
N_TPCTRK=4
N_TPCTRK=$NGPUS
N_FWDMATCH=2
N_PRIMVTXMATCH=1
N_PRIMVTX=1
Expand All @@ -128,16 +150,18 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
N_ITSTRK=12
N_ITSCL=2
export DPL_SMOOTH_RATE_LIMITING=1
elif [[ $OPTIMIZED_PARALLEL_ASYNC =~ ^PbPb_4gpu(_|$) ]]; then
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "PbPb_gpu_NERSC" || $OPTIMIZED_PARALLEL_ASYNC == "PbPb_4gpu_EPN" ]]; then
[[ -z ${TIMEFRAME_RATE_LIMIT:-} ]] && TIMEFRAME_RATE_LIMIT=35
[[ -z ${SHMSIZE:-} ]] && SHMSIZE=100000000000 # SHM_LIMIT 3/4
[[ -z ${TIMEFRAME_SHM_LIMIT:-} ]] && TIMEFRAME_SHM_LIMIT=$(($SHMSIZE / 3))
if [[ $OPTIMIZED_PARALLEL_ASYNC == "PbPb_4gpu_NERSC" ]]; then
if [[ $OPTIMIZED_PARALLEL_ASYNC == "PbPb_gpu_NERSC" ]]; then
NGPUS=1
GPUTYPE=CUDA
else
NGPUS=4
GPUTYPE=HIP
fi
GPUMEMSIZE=$((25 << 30))
NGPURECOTHREADS=8
NTRDTRKTHREADS=8
ITSTRK_THREADS=5
Expand Down Expand Up @@ -172,6 +196,9 @@ if [[ ! -z ${OPTIMIZED_PARALLEL_ASYNC:-} ]]; then
N_MCHTRK=1
N_TOFMATCH=9
N_TPCTRK=6
elif [[ $OPTIMIZED_PARALLEL_ASYNC == "keep_root" ]]; then
TIMEFRAME_RATE_LIMIT=4
SHMSIZE=30000000000
else
echo "Invalid optimized setting '$OPTIMIZED_PARALLEL_ASYNC'" 1>&2
exit 1
Expand Down