Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 17 additions & 20 deletions GPU/Common/GPUCommonAlgorithm.h
Original file line number Diff line number Diff line change
Expand Up @@ -282,30 +282,27 @@ GPUdi() void GPUCommonAlgorithm::sortInBlock(T* begin, T* end, const S& comp)
{
#ifndef GPUCA_GPUCODE
GPUCommonAlgorithm::sort(begin, end, comp);
#elif defined(GPUCA_DETERMINISTIC_MODE) // Not using GPUCA_DETERMINISTIC_CODE, which is enforced in TPC compression
if (get_local_id(0) == 0) {
GPUCommonAlgorithm::sort(begin, end, comp);
}
GPUbarrier();
#else
GPUCA_DETERMINISTIC_CODE( // clang-format off
GPUbarrier();
if (get_local_id(0) == 0) {
GPUCommonAlgorithm::sort(begin, end, comp);
}
GPUbarrier();
, // !GPUCA_DETERMINISTIC_CODE
int32_t n = end - begin;
for (int32_t i = 0; i < n; i++) {
for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) {
int32_t offset = i % 2;
int32_t curPos = 2 * tIdx + offset;
int32_t nextPos = curPos + 1;

if (nextPos < n) {
if (!comp(begin[curPos], begin[nextPos])) {
IterSwap(&begin[curPos], &begin[nextPos]);
}
int32_t n = end - begin;
for (int32_t i = 0; i < n; i++) {
for (int32_t tIdx = get_local_id(0); tIdx < n; tIdx += get_local_size(0)) {
int32_t offset = i % 2;
int32_t curPos = 2 * tIdx + offset;
int32_t nextPos = curPos + 1;

if (nextPos < n) {
if (!comp(begin[curPos], begin[nextPos])) {
IterSwap(&begin[curPos], &begin[nextPos]);
}
}
GPUbarrier();
}
) // clang-format on
GPUbarrier();
}
#endif
}

Expand Down
5 changes: 4 additions & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDArtc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -14,9 +14,12 @@

#define GPUCA_GPUCODE_GENRTC
#define GPUCA_GPUCODE_COMPILEKERNELS

// Keep some preprocessor calls unprocessed
#define GPUCA_RTC_SPECIAL_CODE(...) GPUCA_RTC_SPECIAL_CODE(__VA_ARGS__)
#define GPUCA_DETERMINISTIC_CODE(...) GPUCA_DETERMINISTIC_CODE(__VA_ARGS__)
// GPUReconstructionCUDAIncludesSystem.h prependended without preprocessor running

// GPUReconstructionCUDAIncludesSystem.h prependended by CMakewithout preprocessor running
#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionIncludesDeviceAll.h"

Expand Down
24 changes: 12 additions & 12 deletions GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -273,19 +273,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
#ifdef GPUCA_GPUCODE
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE);
#endif
GPUCA_DETERMINISTIC_CODE( // clang-format off
#ifdef GPUCA_DETERMINISTIC_MODE // Not using GPUCA_DETERMINISTIC_CODE, which is enforced in TPC compression
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>(clusters->clusters[iSector][iRow]));
#else // GPUCA_DETERMINISTIC_MODE
if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>(clusters->clusters[iSector][iRow]));
, // !GPUCA_DETERMINISTIC_CODE
if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>(clusters->clusters[iSector][iRow]));
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZTimePad>(clusters->clusters[iSector][iRow]));
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortPad>(clusters->clusters[iSector][iRow]));
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortTime>(clusters->clusters[iSector][iRow]));
}
) // clang-format on
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZTimePad>(clusters->clusters[iSector][iRow]));
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortPad>(clusters->clusters[iSector][iRow]));
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortTime>(clusters->clusters[iSector][iRow]));
}
#endif // GPUCA_DETERMINISTIC_MODE
GPUbarrier();
}

Expand Down