@@ -411,68 +411,49 @@ if [[ $keep -eq 1 ]]; then
411411fi
412412echo " SETTING_ROOT_OUTPUT = $SETTING_ROOT_OUTPUT "
413413
414+ if [[ -z " $ALIEN_JDL_USEGPUS " || $ALIEN_JDL_USEGPUS != 1 ]]; then
415+ export
416+ fi
417+
418+
419+ if [[ $ALIEN_JDL_USEGPUS != 1 ]]; then
420+
421+ fi
422+
414423# Enabling GPUs
415- if [[ -n " $ALIEN_JDL_USEGPUS " && $ALIEN_JDL_USEGPUS != 0 ]] ; then
416- echo " Enabling GPUS"
417- [[ -z ${GPUTYPE:- } ]] && export GPUTYPE=" HIP"
418- [[ -z ${GPUMEMSIZE:- } ]] && export GPUMEMSIZE=$(( 25 << 30 ))
419- if [[ " 0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS " != " 01" ]]; then
424+ if [[ $ASYNC_PASS_NO_OPTIMIZED_DEFAULTS != 1 ]]; then
425+ export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
426+ ALIEN_JDL_SITE=EPN
427+ if [[ $ALIEN_JDL_USEGPUS == 1 ]] ; then
428+ echo " Enabling GPUS"
429+ if [[ $ALIEN_JDL_SITE == " NERSC" ]]; then # Disable mlock / ulimit / gpu memory registration - has performance impact, but doesn't work at NERSC for now
430+ SETENV_NO_ULIMIT=1
431+ CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow+=" GPU_proc.noGPUMemoryRegistration=1;"
432+ fi
420433 if [[ " ALIEN_JDL_USEFULLNUMADOMAIN" == 0 ]]; then
421434 if [[ $keep -eq 0 ]]; then
422435 if [[ $ALIEN_JDL_UNOPTIMIZEDGPUSETTINGS != 1 ]]; then
423- export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu # sets the multiplicities to optimized defaults for this configuration (1 job with 1 gpu on EPNs)
424- export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
436+ export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITE} # (16 cores, 1 gpu per job, pp)
425437 else
426- # forcing multiplicities to be 1
427- export MULTIPLICITY_PROCESS_tof_matcher=1
428- export MULTIPLICITY_PROCESS_mch_cluster_finder=1
429- export MULTIPLICITY_PROCESS_tpc_entropy_decoder=1
430- export MULTIPLICITY_PROCESS_itstpc_track_matcher=1
431- export MULTIPLICITY_PROCESS_its_tracker=1
432- export OMP_NUM_THREADS=4
433- export TIMEFRAME_RATE_LIMIT=8
434- export SHMSIZE=30000000000
438+ export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITE} _unoptimized # (16 cores, 1 gpu per job, pp, low CPU multiplicities)
435439 fi
436440 else
437- export TIMEFRAME_RATE_LIMIT=4
438- export SHMSIZE=30000000000
441+ export OPTIMIZED_PARALLEL_ASYNC=keep_root
439442 fi
440443 else
441444 if [[ $BEAMTYPE == " pp" ]]; then
442- export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, pp)
443- export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
445+ export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu_${ALIEN_JDL_SITE} # (64 cores, 1 NUMA, 4 gpu per job, pp)
444446 else # PbPb
445- export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, PbPb)
446- export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
447+ export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu_${ALIEN_JDL_SITE} # (64 cores, 1 NUMA 4 gpu per job, PbPb)
447448 fi
448449 fi
449- fi
450- else
451- # David, Oct 13th
452- # the optimized settings for the 8 core GRID queue without GPU are
453- # (overwriting the values above)
454- #
455- if [[ " 0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS " != " 01" ]]; then
456- if [[ " $ALIEN_JDL_EPNFULLNUMACPUONLY " != 1 ]]; then
457- if [[ $BEAMTYPE == " pp" ]]; then
458- if (( $(echo "$RUN_IR > 800000 " | bc - l) )) ; then
459- export TIMEFRAME_RATE_LIMIT=1
460- elif (( $(echo "$RUN_IR < 50000 " | bc - l) )) ; then
461- export TIMEFRAME_RATE_LIMIT=6
462- else
463- export TIMEFRAME_RATE_LIMIT=3
464- fi
465- export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu # sets the multiplicities to optimized defaults for this configuration (grid)
466- export SHMSIZE=16000000000
467- else # PbPb
468- export TIMEFRAME_RATE_LIMIT=2
469- export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu
470- export SHMSIZE=16000000000
471- export SVERTEX_THREADS=5
472- fi
450+ else
451+ SETENV_NO_ULIMIT=1
452+ DPL_DEFAULT_PIPELINE_LENGTH=16 # to avoid memory issues - affects performance, so don't do with GPUs
453+ if [[ $ALIEN_JDL_EPNFULLNUMACPUONLY != 1 ]]; then
454+ export OPTIMIZED_PARALLEL_ASYNC=8cpu # (8 cores per job, grid)
473455 else
474- export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # to use EPNs with full NUMA domain but without GPUs
475- export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1
456+ export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # (64 cores per job, 1 NUMA, EPN)
476457 fi
477458 fi
478459fi
0 commit comments