|
412 | 412 | echo "SETTING_ROOT_OUTPUT = $SETTING_ROOT_OUTPUT" |
413 | 413 |
|
414 | 414 | # Enabling GPUs |
415 | | -if [[ -n "$ALIEN_JDL_USEGPUS" && $ALIEN_JDL_USEGPUS != 0 ]] ; then |
416 | | - echo "Enabling GPUS" |
417 | | - [[ -z ${GPUTYPE:-} ]] && export GPUTYPE="HIP" |
418 | | - [[ -z ${GPUMEMSIZE:-} ]] && export GPUMEMSIZE=$((25 << 30)) |
419 | | - if [[ "0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS" != "01" ]]; then |
| 415 | +if [[ $ASYNC_PASS_NO_OPTIMIZED_DEFAULTS != 1 ]]; then |
| 416 | + export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1 |
| 417 | + if [[ $ALIEN_JDL_USEGPUS == 1 ]] ; then |
| 418 | + echo "Enabling GPUS" |
| 419 | + if [[ -z $ALIEN_JDL_SITEARCH ]]; then echo "ERROR: Must set ALIEN_JDL_SITEARCH to define GPU architecture!"; exit 1; fi |
| 420 | + if [[ $ALIEN_JDL_SITEARCH == "NERSC" ]]; then # Disable mlock / ulimit / gpu memory registration - has performance impact, but doesn't work at NERSC for now |
| 421 | + export SETENV_NO_ULIMIT=1 |
| 422 | + export CONFIG_EXTRA_PROCESS_o2_gpu_reco_workflow+="GPU_proc.noGPUMemoryRegistration=1;" |
| 423 | + fi |
| 424 | + ALIEN_JDL_SITEARCH_TMP=$ALIEN_JDL_SITEARCH |
| 425 | + if [[ $ALIEN_JDL_SITEARCH == "EPN_MI100" ]]; then |
| 426 | + ALIEN_JDL_SITEARCH_TMP=EPN |
| 427 | + export EPN_NODE_MI100=1 |
| 428 | + elif [[ $ALIEN_JDL_SITEARCH == "EPN_MI50" ]]; then |
| 429 | + ALIEN_JDL_SITEARCH_TMP=EPN |
| 430 | + fi |
420 | 431 | if [[ "ALIEN_JDL_USEFULLNUMADOMAIN" == 0 ]]; then |
421 | 432 | if [[ $keep -eq 0 ]]; then |
422 | 433 | if [[ $ALIEN_JDL_UNOPTIMIZEDGPUSETTINGS != 1 ]]; then |
423 | | - export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu # sets the multiplicities to optimized defaults for this configuration (1 job with 1 gpu on EPNs) |
424 | | - export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1 |
| 434 | + export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITEARCH_TMP} # (16 cores, 1 gpu per job, pp) |
425 | 435 | else |
426 | | - # forcing multiplicities to be 1 |
427 | | - export MULTIPLICITY_PROCESS_tof_matcher=1 |
428 | | - export MULTIPLICITY_PROCESS_mch_cluster_finder=1 |
429 | | - export MULTIPLICITY_PROCESS_tpc_entropy_decoder=1 |
430 | | - export MULTIPLICITY_PROCESS_itstpc_track_matcher=1 |
431 | | - export MULTIPLICITY_PROCESS_its_tracker=1 |
432 | | - export OMP_NUM_THREADS=4 |
433 | | - export TIMEFRAME_RATE_LIMIT=8 |
434 | | - export SHMSIZE=30000000000 |
| 436 | + export OPTIMIZED_PARALLEL_ASYNC=pp_1gpu_${ALIEN_JDL_SITEARCH_TMP}_unoptimized # (16 cores, 1 gpu per job, pp, low CPU multiplicities) |
435 | 437 | fi |
436 | 438 | else |
437 | | - export TIMEFRAME_RATE_LIMIT=4 |
438 | | - export SHMSIZE=30000000000 |
| 439 | + export OPTIMIZED_PARALLEL_ASYNC=keep_root |
439 | 440 | fi |
440 | 441 | else |
441 | 442 | if [[ $BEAMTYPE == "pp" ]]; then |
442 | | - export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, pp) |
443 | | - export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1 |
| 443 | + export OPTIMIZED_PARALLEL_ASYNC=pp_4gpu_${ALIEN_JDL_SITEARCH_TMP} # (64 cores, 1 NUMA, 4 gpu per job, pp) |
444 | 444 | else # PbPb |
445 | | - export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu # sets the multiplicities to optimized defaults for this configuration (1 Numa, PbPb) |
446 | | - export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1 |
| 445 | + export OPTIMIZED_PARALLEL_ASYNC=PbPb_4gpu_${ALIEN_JDL_SITEARCH_TMP} # (64 cores, 1 NUMA 4 gpu per job, PbPb) |
447 | 446 | fi |
448 | 447 | fi |
449 | | - fi |
450 | | -else |
451 | | - # David, Oct 13th |
452 | | - # the optimized settings for the 8 core GRID queue without GPU are |
453 | | - # (overwriting the values above) |
454 | | - # |
455 | | - if [[ "0$ASYNC_PASS_NO_OPTIMIZED_DEFAULTS" != "01" ]]; then |
456 | | - if [[ "$ALIEN_JDL_EPNFULLNUMACPUONLY" != 1 ]]; then |
457 | | - if [[ $BEAMTYPE == "pp" ]]; then |
458 | | - if (( $(echo "$RUN_IR > 800000" | bc -l) )); then |
459 | | - export TIMEFRAME_RATE_LIMIT=1 |
460 | | - elif (( $(echo "$RUN_IR < 50000" | bc -l) )); then |
461 | | - export TIMEFRAME_RATE_LIMIT=6 |
462 | | - else |
463 | | - export TIMEFRAME_RATE_LIMIT=3 |
464 | | - fi |
465 | | - export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu # sets the multiplicities to optimized defaults for this configuration (grid) |
466 | | - export SHMSIZE=16000000000 |
467 | | - else # PbPb |
468 | | - export TIMEFRAME_RATE_LIMIT=2 |
469 | | - export OPTIMIZED_PARALLEL_ASYNC=pp_8cpu |
470 | | - export SHMSIZE=16000000000 |
471 | | - export SVERTEX_THREADS=5 |
472 | | - fi |
| 448 | + else |
| 449 | + export SETENV_NO_ULIMIT=1 |
| 450 | + export DPL_DEFAULT_PIPELINE_LENGTH=16 # to avoid memory issues - affects performance, so don't do with GPUs |
| 451 | + if [[ $ALIEN_JDL_EPNFULLNUMACPUONLY != 1 ]]; then |
| 452 | + export OPTIMIZED_PARALLEL_ASYNC=8cpu # (8 cores per job, grid) |
473 | 453 | else |
474 | | - export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # to use EPNs with full NUMA domain but without GPUs |
475 | | - export OPTIMIZED_PARALLEL_ASYNC_AUTO_SHM_LIMIT=1 |
| 454 | + export OPTIMIZED_PARALLEL_ASYNC=pp_64cpu # (64 cores per job, 1 NUMA, EPN) |
476 | 455 | fi |
477 | 456 | fi |
478 | 457 | fi |
|
0 commit comments