Skip to content

Commit f178994

Browse files
committed
GPU TPC Compression: Minor optimization to get rid of one synchronization point
1 parent 335d820 commit f178994

File tree

1 file changed

+4
-4
lines changed

1 file changed

+4
-4
lines changed

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -242,17 +242,17 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
242242
storeCluster = 1;
243243
} while (false);
244244

245+
const uint32_t currentCount = smem.nCount;
245246
GPUbarrier();
246247
int32_t myIndex = work_group_scan_inclusive_add(storeCluster);
247248
int32_t storeLater = -1;
248249
if (storeCluster) {
249-
if (smem.nCount + myIndex <= GPUCA_TPC_COMP_CHUNK_SIZE) {
250-
sortBuffer[smem.nCount + myIndex - 1] = i;
250+
if (currentCount + myIndex <= GPUCA_TPC_COMP_CHUNK_SIZE) {
251+
sortBuffer[currentCount + myIndex - 1] = i;
251252
} else {
252-
storeLater = smem.nCount + myIndex - 1 - GPUCA_TPC_COMP_CHUNK_SIZE;
253+
storeLater = currentCount + myIndex - 1 - GPUCA_TPC_COMP_CHUNK_SIZE;
253254
}
254255
}
255-
GPUbarrier();
256256
if (iThread == nThreads - 1) {
257257
smem.nCount += myIndex;
258258
}

0 commit comments

Comments
 (0)