1+ // Copyright 2024-2025 CERN and copyright holders of ALICE O2.
2+ // See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+ // All rights not expressly granted are reserved.
4+ //
5+ // This software is distributed under the terms of the GNU General Public
6+ // License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+ //
8+ // In applying this license CERN does not waive the privileges and immunities
9+ // granted to it by virtue of its status as an Intergovernmental Organization
10+ // or submit itself to any jurisdiction.
11+
12+ // / \file TPCCLusterDecompressionCore.inc
13+ // / \author Gabriele Cimador
14+
15+ #ifndef TPCCLUSTERDECOMPRESSOR_INC
16+ #define TPCCLUSTERDECOMPRESSOR_INC
17+
18+ #include " GPUTPCDecompression.h"
19+ #include " GPUConstantMem.h"
20+ #include " GPUTPCCompressionTrackModel.h"
21+ #include " GPUCommonAlgorithm.h"
22+ #include " GPUO2DataTypes.h"
23+
24+ using namespace o2 ::tpc;
25+
26+ namespace GPUCA_NAMESPACE ::gpu
27+ {
28+
29+ class TPCClusterDecompressionCore {
30+ public:
31+
32+ #ifndef GPUCA_GPUCODE
33+ GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void (const ClusterNative&, uint32_t )> func)
34+ {
35+ const auto cluster = ClusterNative (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
36+ func (cluster, offset);
37+ return cluster;
38+ }
39+
40+ GPUhi () static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
41+ {
42+ clusterVector.emplace_back (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
43+ return clusterVector.back ();
44+ }
45+
46+ GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
47+ {
48+ std::vector<ClusterNative>& clusterVector = clusters[slice][row];
49+ auto & lock = locks[slice][row];
50+ while (lock.test_and_set (std::memory_order_acquire)) {
51+ }
52+ ClusterNative retVal = decompressTrackStore (clustersCompressed, offset, slice, row, pad, time, clusterVector);
53+ lock.clear (std::memory_order_release);
54+ return retVal;
55+ }
56+ #endif
57+
58+ GPUdii () static ClusterNative decompressTrackStore (const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
59+ {
60+ uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer ) + row * decompressor.mMaxNativeClustersPerBuffer ;
61+ uint32_t currentClusterIndex = CAMath::AtomicAdd (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u );
62+ const ClusterNative c (time, cmprClusters.flagsA [clusterOffset], pad, cmprClusters.sigmaTimeA [clusterOffset], cmprClusters.sigmaPadA [clusterOffset], cmprClusters.qMaxA [clusterOffset], cmprClusters.qTotA [clusterOffset]);
63+ if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer ) {
64+ decompressor.mTmpNativeClusters [tmpBufferIndex + currentClusterIndex] = c;
65+ } else {
66+ decompressor.raiseError (GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer );
67+ CAMath::AtomicExch (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer );
68+ }
69+ return c;
70+ }
71+
72+ template <typename ... Args>
73+ GPUhdi () static void decompressTrack (const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t & clusterOffset, Args&... args)
74+ {
75+ float zOffset = 0 ;
76+ uint32_t slice = cmprClusters.sliceA [trackIndex];
77+ uint32_t row = cmprClusters.rowA [trackIndex];
78+ GPUTPCCompressionTrackModel track;
79+ uint32_t clusterIndex;
80+ for (clusterIndex = 0 ; clusterIndex < cmprClusters.nTrackClusters [trackIndex]; clusterIndex++) {
81+ uint32_t pad = 0 , time = 0 ;
82+ if (clusterIndex != 0 ) {
83+ uint8_t tmpSlice = cmprClusters.sliceLegDiffA [clusterOffset - trackIndex - 1 ];
84+ bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
85+ if (changeLeg) {
86+ tmpSlice -= GPUCA_NSLICES;
87+ }
88+ if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
89+ slice += tmpSlice;
90+ if (slice >= GPUCA_NSLICES) {
91+ slice -= GPUCA_NSLICES;
92+ }
93+ row += cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
94+ if (row >= GPUCA_ROW_COUNT) {
95+ row -= GPUCA_ROW_COUNT;
96+ }
97+ } else {
98+ slice = tmpSlice;
99+ row = cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
100+ }
101+ if (changeLeg && track.Mirror ()) {
102+ break ;
103+ }
104+ if (track.Propagate (param.tpcGeometry .Row2X (row), param.SliceParam [slice].Alpha )) {
105+ break ;
106+ }
107+ uint32_t timeTmp = cmprClusters.timeResA [clusterOffset - trackIndex - 1 ];
108+ if (timeTmp & 800000 ) {
109+ timeTmp |= 0xFF000000 ;
110+ }
111+ time = timeTmp + ClusterNative::packTime (CAMath::Max (0 .f , param.tpcGeometry .LinearZ2Time (slice, track.Z () + zOffset)));
112+ float tmpPad = CAMath::Max (0 .f , CAMath::Min ((float )param.tpcGeometry .NPads (GPUCA_ROW_COUNT - 1 ), param.tpcGeometry .LinearY2Pad (slice, row, track.Y ())));
113+ pad = cmprClusters.padResA [clusterOffset - trackIndex - 1 ] + ClusterNative::packPad (tmpPad);
114+ time = time & 0xFFFFFF ;
115+ pad = (uint16_t )pad;
116+ if (pad >= param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked) {
117+ if (pad >= 0xFFFF - 11968 ) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
118+ pad = 0 ;
119+ } else {
120+ pad = param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked - 1 ;
121+ }
122+ }
123+ if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
124+ if (time >= 0xFFFFFF - 544768 ) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
125+ time = 0 ;
126+ } else {
127+ time = maxTime;
128+ }
129+ }
130+ } else {
131+ time = cmprClusters.timeA [trackIndex];
132+ pad = cmprClusters.padA [trackIndex];
133+ }
134+ const auto cluster = decompressTrackStore (cmprClusters, clusterOffset, slice, row, pad, time, args...);
135+ float y = param.tpcGeometry .LinearPad2Y (slice, row, cluster.getPad ());
136+ float z = param.tpcGeometry .LinearTime2Z (slice, cluster.getTime ());
137+ if (clusterIndex == 0 ) {
138+ zOffset = z;
139+ track.Init (param.tpcGeometry .Row2X (row), y, z - zOffset, param.SliceParam [slice].Alpha , cmprClusters.qPtA [trackIndex], param);
140+ }
141+ if (clusterIndex + 1 < cmprClusters.nTrackClusters [trackIndex] && track.Filter (y, z - zOffset, row)) {
142+ break ;
143+ }
144+ clusterOffset++;
145+ }
146+ clusterOffset += cmprClusters.nTrackClusters [trackIndex] - clusterIndex;
147+ }
148+
149+ GPUhdi () static const auto & decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
150+ {
151+ return ((*(clusterBuffer++) = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k])));
152+ }
153+
154+ GPUhdi () static auto decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void (const ClusterNative&, uint32_t )> func)
155+ {
156+ const auto cluster = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k]);
157+ func (cluster, k);
158+ return cluster;
159+ }
160+
161+ template <typename ... Args>
162+ GPUdii () static void decompressHits (const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
163+ {
164+ uint32_t time = 0 ;
165+ uint16_t pad = 0 ;
166+ for (uint32_t k = start; k < end; k++) {
167+ if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
168+ uint32_t timeTmp = cmprClusters.timeDiffU [k];
169+ if (timeTmp & 800000 ) {
170+ timeTmp |= 0xFF000000 ;
171+ }
172+ time += timeTmp;
173+ pad += cmprClusters.padDiffU [k];
174+ } else {
175+ time = cmprClusters.timeDiffU [k];
176+ pad = cmprClusters.padDiffU [k];
177+ }
178+ decompressHitsStore (cmprClusters, k, time, pad, args...);
179+ }
180+ }
181+
182+ };
183+ }
184+
185+ #endif
0 commit comments