Skip to content

Commit 62d02b2

Browse files
Gabriele Cimadordavidrohr
authored andcommitted
GPU: TPC Decoding: added variadic templates to decompressTrack() and decompressHits() in GPU code
1 parent bcec346 commit 62d02b2

File tree

2 files changed

+20
-8
lines changed

2 files changed

+20
-8
lines changed

GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx

Lines changed: 13 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -31,11 +31,13 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
3131
const uint32_t maxTime = (param.continuousMaxTimeBin + 1) * ClusterNative::scaleTimePacked - 1;
3232

3333
for (int32_t i = trackStart + get_global_id(0); i < trackEnd; i += get_global_size(0)) {
34-
decompressTrack(cmprClusters, param, maxTime, i, decompressor.mAttachedClustersOffsets[i], decompressor);
34+
uint32_t offset = decompressor.mAttachedClustersOffsets[i];
35+
decompressTrack(cmprClusters, param, maxTime, i, offset, decompressor);
3536
}
3637
}
3738

38-
GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t clusterOffset, GPUTPCDecompression& decompressor)
39+
template <typename... Args>
40+
GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t& clusterOffset, Args&... args)
3941
{
4042
float zOffset = 0;
4143
uint32_t slice = cmprClusters.sliceA[trackIndex];
@@ -96,7 +98,7 @@ GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cm
9698
time = cmprClusters.timeA[trackIndex];
9799
pad = cmprClusters.padA[trackIndex];
98100
}
99-
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, decompressor);
101+
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...);
100102
float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad());
101103
float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime());
102104
if (clusterIndex == 0) {
@@ -111,7 +113,7 @@ GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cm
111113
clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex;
112114
}
113115

114-
GPUdii() ClusterNative GPUTPCDecompressionKernels::decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
116+
GPUdii() ClusterNative GPUTPCDecompressionKernels::decompressTrackStore(const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
115117
{
116118
uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(slice, row, decompressor.mMaxNativeClustersPerBuffer);
117119
uint32_t currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u);
@@ -161,7 +163,8 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
161163
}
162164
}
163165

164-
GPUdii() void GPUTPCDecompressionKernels::decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, ClusterNative* clusterNativeBuffer)
166+
template <typename... Args>
167+
GPUdii() void GPUTPCDecompressionKernels::decompressHits(const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
165168
{
166169
uint32_t time = 0;
167170
uint16_t pad = 0;
@@ -177,10 +180,14 @@ GPUdii() void GPUTPCDecompressionKernels::decompressHits(const o2::tpc::Compress
177180
time = cmprClusters.timeDiffU[k];
178181
pad = cmprClusters.padDiffU[k];
179182
}
180-
*(clusterNativeBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
183+
decompressHitsStore(cmprClusters, k, time, pad, args...);
181184
}
182185
}
183186

187+
GPUdii() void GPUTPCDecompressionKernels::decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterNativeBuffer){
188+
*(clusterNativeBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
189+
}
190+
184191
template <typename T>
185192
GPUdi() void GPUTPCDecompressionKernels::decompressorMemcpyBasic(T* GPUrestrict() dst, const T* GPUrestrict() src, uint32_t size)
186193
{

GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,9 +45,14 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate
4545

4646
template <int32_t iKernel = defaultKernel, typename... Args>
4747
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, Args... args);
48-
GPUd() static void decompressTrack(o2::tpc::CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t clusterOffset, GPUTPCDecompression& decompressor);
48+
49+
template <typename... Args>
50+
GPUd() static void decompressTrack(o2::tpc::CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t& clusterOffset, Args&... args);
4951
GPUdi() static o2::tpc::ClusterNative decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor);
50-
GPUdi() static void decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, o2::tpc::ClusterNative* clusterNativeBuffer);
52+
53+
template <typename... Args>
54+
GPUdi() static void decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args);
55+
GPUdi() static void decompressHitsStore(const o2::tpc::CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, o2::tpc::ClusterNative*& clusterNativeBuffer);
5156

5257
GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t slice, uint32_t row, uint32_t maxClustersPerBuffer)
5358
{

0 commit comments

Comments
 (0)