2121#include " GPUCommonAlgorithm.h"
2222#include " GPUO2DataTypes.h"
2323
24+ #ifndef GPUCA_GPUCODE
25+ #include < functional>
26+ #endif
27+
2428using namespace o2 ::tpc;
2529
2630namespace GPUCA_NAMESPACE ::gpu
2731{
2832
29- class TPCClusterDecompressionCore {
30- public:
31-
32- #ifndef GPUCA_GPUCODE
33- GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void (const ClusterNative&, uint32_t )> func)
33+ class TPCClusterDecompressionCore
3434{
35- const auto cluster = ClusterNative (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
36- func (cluster, offset);
37- return cluster;
38- }
35+ public:
36+ #ifndef GPUCA_GPUCODE
37+ GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void (const ClusterNative&, uint32_t )> func)
38+ {
39+ const auto cluster = ClusterNative (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
40+ func (cluster, offset);
41+ return cluster;
42+ }
3943
40- GPUhi () static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
41- {
42- clusterVector.emplace_back (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
43- return clusterVector.back ();
44- }
44+ GPUhi () static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
45+ {
46+ clusterVector.emplace_back (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
47+ return clusterVector.back ();
48+ }
4549
46- GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
47- {
48- std::vector<ClusterNative>& clusterVector = clusters[slice][row];
49- auto & lock = locks[slice][row];
50- while (lock.test_and_set (std::memory_order_acquire)) {
50+ GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
51+ {
52+ std::vector<ClusterNative>& clusterVector = clusters[slice][row];
53+ auto & lock = locks[slice][row];
54+ while (lock.test_and_set (std::memory_order_acquire)) {
55+ }
56+ ClusterNative retVal = decompressTrackStore (clustersCompressed, offset, slice, row, pad, time, clusterVector);
57+ lock.clear (std::memory_order_release);
58+ return retVal;
5159 }
52- ClusterNative retVal = decompressTrackStore (clustersCompressed, offset, slice, row, pad, time, clusterVector);
53- lock.clear (std::memory_order_release);
54- return retVal;
55- }
5660#endif
5761
58- GPUdi () static ClusterNative decompressTrackStore (const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
59- {
60- uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer ) + row * decompressor.mMaxNativeClustersPerBuffer ;
61- uint32_t currentClusterIndex = CAMath::AtomicAdd (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u );
62- const ClusterNative c (time, cmprClusters.flagsA [clusterOffset], pad, cmprClusters.sigmaTimeA [clusterOffset], cmprClusters.sigmaPadA [clusterOffset], cmprClusters.qMaxA [clusterOffset], cmprClusters.qTotA [clusterOffset]);
63- if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer ) {
64- decompressor.mTmpNativeClusters [tmpBufferIndex + currentClusterIndex] = c;
65- } else {
66- decompressor.raiseError (GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer );
67- CAMath::AtomicExch (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer );
62+ GPUdi () static ClusterNative decompressTrackStore (const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
63+ {
64+ uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer ) + row * decompressor.mMaxNativeClustersPerBuffer ;
65+ uint32_t currentClusterIndex = CAMath::AtomicAdd (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u );
66+ const ClusterNative c (time, cmprClusters.flagsA [clusterOffset], pad, cmprClusters.sigmaTimeA [clusterOffset], cmprClusters.sigmaPadA [clusterOffset], cmprClusters.qMaxA [clusterOffset], cmprClusters.qTotA [clusterOffset]);
67+ if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer ) {
68+ decompressor.mTmpNativeClusters [tmpBufferIndex + currentClusterIndex] = c;
69+ } else {
70+ decompressor.raiseError (GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer );
71+ CAMath::AtomicExch (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer );
72+ }
73+ return c;
6874 }
69- return c;
70- }
7175
72- template <typename ... Args>
73- GPUhdi () static void decompressTrack (const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t & clusterOffset, Args&... args)
74- {
75- float zOffset = 0 ;
76- uint32_t slice = cmprClusters.sliceA [trackIndex];
77- uint32_t row = cmprClusters.rowA [trackIndex];
78- GPUTPCCompressionTrackModel track;
79- uint32_t clusterIndex;
80- for (clusterIndex = 0 ; clusterIndex < cmprClusters.nTrackClusters [trackIndex]; clusterIndex++) {
81- uint32_t pad = 0 , time = 0 ;
82- if (clusterIndex != 0 ) {
83- uint8_t tmpSlice = cmprClusters.sliceLegDiffA [clusterOffset - trackIndex - 1 ];
84- bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
85- if (changeLeg) {
86- tmpSlice -= GPUCA_NSLICES;
87- }
88- if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
89- slice += tmpSlice;
90- if (slice >= GPUCA_NSLICES) {
91- slice -= GPUCA_NSLICES;
76+ template <typename ... Args>
77+ GPUdi () static void decompressTrack (const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t & trackIndex, uint32_t & clusterOffset, Args&... args)
78+ {
79+ float zOffset = 0 ;
80+ uint32_t slice = cmprClusters.sliceA [trackIndex];
81+ uint32_t row = cmprClusters.rowA [trackIndex];
82+ GPUTPCCompressionTrackModel track;
83+ uint32_t clusterIndex;
84+ for (clusterIndex = 0 ; clusterIndex < cmprClusters.nTrackClusters [trackIndex]; clusterIndex++) {
85+ uint32_t pad = 0 , time = 0 ;
86+ if (clusterIndex != 0 ) {
87+ uint8_t tmpSlice = cmprClusters.sliceLegDiffA [clusterOffset - trackIndex - 1 ];
88+ bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
89+ if (changeLeg) {
90+ tmpSlice -= GPUCA_NSLICES;
91+ }
92+ if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
93+ slice += tmpSlice;
94+ if (slice >= GPUCA_NSLICES) {
95+ slice -= GPUCA_NSLICES;
96+ }
97+ row += cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
98+ if (row >= GPUCA_ROW_COUNT) {
99+ row -= GPUCA_ROW_COUNT;
100+ }
101+ } else {
102+ slice = tmpSlice;
103+ row = cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
104+ }
105+ if (changeLeg && track.Mirror ()) {
106+ break ;
107+ }
108+ if (track.Propagate (param.tpcGeometry .Row2X (row), param.SliceParam [slice].Alpha )) {
109+ break ;
110+ }
111+ uint32_t timeTmp = cmprClusters.timeResA [clusterOffset - trackIndex - 1 ];
112+ if (timeTmp & 800000 ) {
113+ timeTmp |= 0xFF000000 ;
114+ }
115+ time = timeTmp + ClusterNative::packTime (CAMath::Max (0 .f , param.tpcGeometry .LinearZ2Time (slice, track.Z () + zOffset)));
116+ float tmpPad = CAMath::Max (0 .f , CAMath::Min ((float )param.tpcGeometry .NPads (GPUCA_ROW_COUNT - 1 ), param.tpcGeometry .LinearY2Pad (slice, row, track.Y ())));
117+ pad = cmprClusters.padResA [clusterOffset - trackIndex - 1 ] + ClusterNative::packPad (tmpPad);
118+ time = time & 0xFFFFFF ;
119+ pad = (uint16_t )pad;
120+ if (pad >= param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked) {
121+ if (pad >= 0xFFFF - 11968 ) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
122+ pad = 0 ;
123+ } else {
124+ pad = param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked - 1 ;
125+ }
92126 }
93- row += cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
94- if (row >= GPUCA_ROW_COUNT) {
95- row -= GPUCA_ROW_COUNT;
127+ if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
128+ if (time >= 0xFFFFFF - 544768 ) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
129+ time = 0 ;
130+ } else {
131+ time = maxTime;
132+ }
96133 }
97134 } else {
98- slice = tmpSlice ;
99- row = cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
135+ time = cmprClusters. timeA [trackIndex] ;
136+ pad = cmprClusters.padA [ trackIndex];
100137 }
101- if (changeLeg && track.Mirror ()) {
102- break ;
138+ const auto cluster = decompressTrackStore (cmprClusters, clusterOffset, slice, row, pad, time, args...);
139+ float y = param.tpcGeometry .LinearPad2Y (slice, row, cluster.getPad ());
140+ float z = param.tpcGeometry .LinearTime2Z (slice, cluster.getTime ());
141+ if (clusterIndex == 0 ) {
142+ zOffset = z;
143+ track.Init (param.tpcGeometry .Row2X (row), y, z - zOffset, param.SliceParam [slice].Alpha , cmprClusters.qPtA [trackIndex], param);
103144 }
104- if (track. Propagate (param. tpcGeometry . Row2X (row), param. SliceParam [slice]. Alpha )) {
145+ if (clusterIndex + 1 < cmprClusters. nTrackClusters [trackIndex] && track. Filter (y, z - zOffset, row )) {
105146 break ;
106147 }
107- uint32_t timeTmp = cmprClusters.timeResA [clusterOffset - trackIndex - 1 ];
108- if (timeTmp & 800000 ) {
109- timeTmp |= 0xFF000000 ;
110- }
111- time = timeTmp + ClusterNative::packTime (CAMath::Max (0 .f , param.tpcGeometry .LinearZ2Time (slice, track.Z () + zOffset)));
112- float tmpPad = CAMath::Max (0 .f , CAMath::Min ((float )param.tpcGeometry .NPads (GPUCA_ROW_COUNT - 1 ), param.tpcGeometry .LinearY2Pad (slice, row, track.Y ())));
113- pad = cmprClusters.padResA [clusterOffset - trackIndex - 1 ] + ClusterNative::packPad (tmpPad);
114- time = time & 0xFFFFFF ;
115- pad = (uint16_t )pad;
116- if (pad >= param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked) {
117- if (pad >= 0xFFFF - 11968 ) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
118- pad = 0 ;
119- } else {
120- pad = param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked - 1 ;
121- }
122- }
123- if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
124- if (time >= 0xFFFFFF - 544768 ) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
125- time = 0 ;
126- } else {
127- time = maxTime;
128- }
129- }
130- } else {
131- time = cmprClusters.timeA [trackIndex];
132- pad = cmprClusters.padA [trackIndex];
148+ clusterOffset++;
133149 }
134- const auto cluster = decompressTrackStore (cmprClusters, clusterOffset, slice, row, pad, time, args...);
135- float y = param.tpcGeometry .LinearPad2Y (slice, row, cluster.getPad ());
136- float z = param.tpcGeometry .LinearTime2Z (slice, cluster.getTime ());
137- if (clusterIndex == 0 ) {
138- zOffset = z;
139- track.Init (param.tpcGeometry .Row2X (row), y, z - zOffset, param.SliceParam [slice].Alpha , cmprClusters.qPtA [trackIndex], param);
140- }
141- if (clusterIndex + 1 < cmprClusters.nTrackClusters [trackIndex] && track.Filter (y, z - zOffset, row)) {
142- break ;
143- }
144- clusterOffset++;
150+ clusterOffset += cmprClusters.nTrackClusters [trackIndex] - clusterIndex;
145151 }
146- clusterOffset += cmprClusters.nTrackClusters [trackIndex] - clusterIndex;
147- }
148152
149- GPUhdi () static const auto & decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
150- {
151- return ((*(clusterBuffer++) = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k])));
152- }
153+ GPUdi () static const auto & decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
154+ {
155+ return ((*(clusterBuffer++) = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k])));
156+ }
153157
154- GPUhdi () static auto decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void (const ClusterNative&, uint32_t )> func)
155- {
156- const auto cluster = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k]);
157- func (cluster, k);
158- return cluster;
159- }
158+ #ifndef GPUCA_GPUCODE
159+ GPUhi () static auto decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void (const ClusterNative&, uint32_t )> func)
160+ {
161+ const auto cluster = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k]);
162+ func (cluster, k);
163+ return cluster;
164+ }
165+ #endif
160166
161- template <typename ... Args>
162- GPUdi () static void decompressHits (const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
163- {
164- uint32_t time = 0 ;
165- uint16_t pad = 0 ;
166- for (uint32_t k = start; k < end; k++) {
167- if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
168- uint32_t timeTmp = cmprClusters.timeDiffU [k];
169- if (timeTmp & 800000 ) {
170- timeTmp |= 0xFF000000 ;
167+ template <typename ... Args>
168+ GPUdi () static void decompressHits (const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
169+ {
170+ uint32_t time = 0 ;
171+ uint16_t pad = 0 ;
172+ for (uint32_t k = start; k < end; k++) {
173+ if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
174+ uint32_t timeTmp = cmprClusters.timeDiffU [k];
175+ if (timeTmp & 800000 ) {
176+ timeTmp |= 0xFF000000 ;
177+ }
178+ time += timeTmp;
179+ pad += cmprClusters.padDiffU [k];
180+ } else {
181+ time = cmprClusters.timeDiffU [k];
182+ pad = cmprClusters.padDiffU [k];
171183 }
172- time += timeTmp;
173- pad += cmprClusters.padDiffU [k];
174- } else {
175- time = cmprClusters.timeDiffU [k];
176- pad = cmprClusters.padDiffU [k];
184+ decompressHitsStore (cmprClusters, k, time, pad, args...);
177185 }
178- decompressHitsStore (cmprClusters, k, time, pad, args...);
179186 }
180- }
181-
182187};
183- }
188+ } // namespace GPUCA_NAMESPACE::gpu
184189
185190#endif
0 commit comments