@@ -283,21 +283,29 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp)
283283#ifndef GPUCA_GPUCODE
284284 GPUCommonAlgorithm::sort (begin, end, comp);
285285#else
286- int32_t n = end - begin;
287- for (int32_t i = 0 ; i < n; i++) {
288- for (int32_t tIdx = get_local_id (0 ); tIdx < n; tIdx += get_local_size (0 )) {
289- int32_t offset = i % 2 ;
290- int32_t curPos = 2 * tIdx + offset;
291- int32_t nextPos = curPos + 1 ;
292-
293- if (nextPos < n) {
294- if (!comp (begin[curPos], begin[nextPos])) {
295- IterSwap (&begin[curPos], &begin[nextPos]);
286+ GPUCA_DETERMINISTIC_CODE ( // clang-format off
287+ GPUbarrier ();
288+ if (get_local_id (0 ) == 0 ) {
289+ GPUCommonAlgorithm::sort (begin, end, comp);
290+ }
291+ GPUbarrier ();
292+ , // !GPUCA_DETERMINISTIC_CODE
293+ int32_t n = end - begin;
294+ for (int32_t i = 0 ; i < n; i++) {
295+ for (int32_t tIdx = get_local_id (0 ); tIdx < n; tIdx += get_local_size (0 )) {
296+ int32_t offset = i % 2 ;
297+ int32_t curPos = 2 * tIdx + offset;
298+ int32_t nextPos = curPos + 1 ;
299+
300+ if (nextPos < n) {
301+ if (!comp (begin[curPos], begin[nextPos])) {
302+ IterSwap (&begin[curPos], &begin[nextPos]);
303+ }
296304 }
297305 }
306+ GPUbarrier ();
298307 }
299- GPUbarrier ();
300- }
308+ ) // clang-format on
301309#endif
302310}
303311
0 commit comments