Skip to content

Commit fff75cf

Browse files
committed
GPU: Use total sorting in deterministic mode for unattached clusters
1 parent 64dd944 commit fff75cf

File tree

1 file changed

+28
-12
lines changed

1 file changed

+28
-12
lines changed

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 28 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -148,19 +148,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step0at
148148
}
149149

150150
template <>
151-
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<0>::operator()(uint32_t a, uint32_t b) const
151+
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<GPUSettings::SortTime>::operator()(uint32_t a, uint32_t b) const
152152
{
153153
return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked();
154154
}
155155

156156
template <>
157-
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<1>::operator()(uint32_t a, uint32_t b) const
157+
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<GPUSettings::SortPad>::operator()(uint32_t a, uint32_t b) const
158158
{
159159
return mClsPtr[a].padPacked < mClsPtr[b].padPacked;
160160
}
161161

162162
template <>
163-
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<2>::operator()(uint32_t a, uint32_t b) const
163+
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<GPUSettings::SortZTimePad>::operator()(uint32_t a, uint32_t b) const
164164
{
165165
if (mClsPtr[a].getTimePacked() >> 3 == mClsPtr[b].getTimePacked() >> 3) {
166166
return mClsPtr[a].padPacked < mClsPtr[b].padPacked;
@@ -169,14 +169,26 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<2>::opera
169169
}
170170

171171
template <>
172-
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<3>::operator()(uint32_t a, uint32_t b) const
172+
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>::operator()(uint32_t a, uint32_t b) const
173173
{
174174
if (mClsPtr[a].padPacked >> 3 == mClsPtr[b].padPacked >> 3) {
175175
return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked();
176176
}
177177
return mClsPtr[a].padPacked < mClsPtr[b].padPacked;
178178
}
179179

180+
template <> // Deterministic comparison
181+
GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<4>::operator()(uint32_t a, uint32_t b) const
182+
{
183+
if (mClsPtr[a].getTimePacked() != mClsPtr[b].getTimePacked()) {
184+
return mClsPtr[a].getTimePacked() < mClsPtr[b].getTimePacked();
185+
}
186+
if (mClsPtr[a].padPacked != mClsPtr[b].padPacked) {
187+
return mClsPtr[a].padPacked < mClsPtr[b].padPacked;
188+
}
189+
return mClsPtr[a].qTot < mClsPtr[b].qTot;
190+
}
191+
180192
template <>
181193
GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1unattached>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors)
182194
{
@@ -261,15 +273,19 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
261273
#ifdef GPUCA_GPUCODE
262274
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE);
263275
#endif
264-
if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) {
276+
GPUCA_DETERMINISTIC_CODE( // clang-format off
265277
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>(clusters->clusters[iSector][iRow]));
266-
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) {
267-
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZTimePad>(clusters->clusters[iSector][iRow]));
268-
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) {
269-
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortPad>(clusters->clusters[iSector][iRow]));
270-
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) {
271-
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortTime>(clusters->clusters[iSector][iRow]));
272-
}
278+
, // !GPUCA_DETERMINISTIC_CODE
279+
if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) {
280+
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>(clusters->clusters[iSector][iRow]));
281+
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) {
282+
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZTimePad>(clusters->clusters[iSector][iRow]));
283+
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortPad) {
284+
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortPad>(clusters->clusters[iSector][iRow]));
285+
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortTime) {
286+
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortTime>(clusters->clusters[iSector][iRow]));
287+
}
288+
) // clang-format on
273289
GPUbarrier();
274290
}
275291

0 commit comments

Comments
 (0)