Skip to content

Commit 09d91d2

Browse files
committed
GPU: Fix Warp race condition in CUDA with Turing and later GPUs
1 parent 22371e6 commit 09d91d2

File tree

1 file changed

+3
-0
lines changed

1 file changed

+3
-0
lines changed

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -481,16 +481,19 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpyBuffered(V* buf, T*
481481
compressorMemcpyBasic(bufT + shmPos, src + srcOffset + srcPos, size, nLanes, iLane);
482482
srcPos += size;
483483
shmPos += size;
484+
GPUbarrierWarp();
484485

485486
if (shmPos >= bufTSize) {
486487
compressorMemcpyBasic(dstAligned + dstOffset, buf, bufSize, nLanes, iLane);
487488
dstOffset += bufSize;
488489
shmPos = 0;
490+
GPUbarrierWarp();
489491
}
490492
}
491493
}
492494

493495
compressorMemcpyBasic(reinterpret_cast<T*>(dstAligned + dstOffset), bufT, shmPos, nLanes, iLane);
496+
GPUbarrierWarp();
494497
}
495498

496499
template <typename T>

0 commit comments

Comments
 (0)