Skip to content

Commit fd6d4eb

Browse files
committed
dpl-workflow: simple serialization mitigation seems not to work on MI100 in async, have to use full serialization workaround
1 parent de1a1d6 commit fd6d4eb

File tree

1 file changed

+2
-1
lines changed

1 file changed

+2
-1
lines changed

prodtests/full-system-test/dpl-workflow.sh

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -272,7 +272,8 @@ if [[ $GPUTYPE == "HIP" ]]; then
272272
GPU_CONFIG+=" --environment \"ROCR_VISIBLE_DEVICES={timeslice${TIMESLICEOFFSET}}\""
273273
fi
274274
# serialization workaround for MI100 nodes: remove it again if the problem will be fixed in ROCm, then also remove the DISABLE_MI100_SERIALIZATION flag in the O2DPG parse script
275-
[[ $EPNSYNCMODE == 1 || -n ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.amdMI100SerializationWorkaround=1;"
275+
[[ $EPNSYNCMODE == 1 ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.amdMI100SerializationWorkaround=1;"
276+
[[ -n ${OPTIMIZED_PARALLEL_ASYNC:-} ]] && [[ ${EPN_NODE_MI100:-} == "1" ]] && [[ ${DISABLE_MI100_SERIALIZATION:-0} != 1 ]] && GPU_CONFIG_KEY+="GPU_proc.serializeGPU=3;"
276277
#export HSA_TOOLS_LIB=/opt/rocm/lib/librocm-debug-agent.so.2
277278
else
278279
GPU_CONFIG_KEY+="GPU_proc.deviceNum=-2;"

0 commit comments

Comments
 (0)