@@ -26,160 +26,159 @@ using namespace o2::tpc;
2626namespace GPUCA_NAMESPACE ::gpu
2727{
2828
29- class TPCClusterDecompressionCore {
30- public:
31-
32- #ifndef GPUCA_GPUCODE
33- GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void (const ClusterNative&, uint32_t )> func)
29+ class TPCClusterDecompressionCore
3430{
35- const auto cluster = ClusterNative (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
36- func (cluster, offset);
37- return cluster;
38- }
31+ public:
32+ #ifndef GPUCA_GPUCODE
33+ GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void (const ClusterNative&, uint32_t )> func)
34+ {
35+ const auto cluster = ClusterNative (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
36+ func (cluster, offset);
37+ return cluster;
38+ }
3939
40- GPUhi () static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
41- {
42- clusterVector.emplace_back (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
43- return clusterVector.back ();
44- }
40+ GPUhi () static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
41+ {
42+ clusterVector.emplace_back (time, clustersCompressed.flagsA [offset], pad, clustersCompressed.sigmaTimeA [offset], clustersCompressed.sigmaPadA [offset], clustersCompressed.qMaxA [offset], clustersCompressed.qTotA [offset]);
43+ return clusterVector.back ();
44+ }
4545
46- GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
47- {
48- std::vector<ClusterNative>& clusterVector = clusters[slice][row];
49- auto & lock = locks[slice][row];
50- while (lock.test_and_set (std::memory_order_acquire)) {
46+ GPUhi () static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
47+ {
48+ std::vector<ClusterNative>& clusterVector = clusters[slice][row];
49+ auto & lock = locks[slice][row];
50+ while (lock.test_and_set (std::memory_order_acquire)) {
51+ }
52+ ClusterNative retVal = decompressTrackStore (clustersCompressed, offset, slice, row, pad, time, clusterVector);
53+ lock.clear (std::memory_order_release);
54+ return retVal;
5155 }
52- ClusterNative retVal = decompressTrackStore (clustersCompressed, offset, slice, row, pad, time, clusterVector);
53- lock.clear (std::memory_order_release);
54- return retVal;
55- }
5656#endif
5757
58- GPUdi () static ClusterNative decompressTrackStore (const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
59- {
60- uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer ) + row * decompressor.mMaxNativeClustersPerBuffer ;
61- uint32_t currentClusterIndex = CAMath::AtomicAdd (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u );
62- const ClusterNative c (time, cmprClusters.flagsA [clusterOffset], pad, cmprClusters.sigmaTimeA [clusterOffset], cmprClusters.sigmaPadA [clusterOffset], cmprClusters.qMaxA [clusterOffset], cmprClusters.qTotA [clusterOffset]);
63- if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer ) {
64- decompressor.mTmpNativeClusters [tmpBufferIndex + currentClusterIndex] = c;
65- } else {
66- decompressor.raiseError (GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer );
67- CAMath::AtomicExch (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer );
58+ GPUdi () static ClusterNative decompressTrackStore (const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
59+ {
60+ uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer ) + row * decompressor.mMaxNativeClustersPerBuffer ;
61+ uint32_t currentClusterIndex = CAMath::AtomicAdd (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u );
62+ const ClusterNative c (time, cmprClusters.flagsA [clusterOffset], pad, cmprClusters.sigmaTimeA [clusterOffset], cmprClusters.sigmaPadA [clusterOffset], cmprClusters.qMaxA [clusterOffset], cmprClusters.qTotA [clusterOffset]);
63+ if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer ) {
64+ decompressor.mTmpNativeClusters [tmpBufferIndex + currentClusterIndex] = c;
65+ } else {
66+ decompressor.raiseError (GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer );
67+ CAMath::AtomicExch (decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer );
68+ }
69+ return c;
6870 }
69- return c;
70- }
7171
72- template <typename ... Args>
73- GPUhdi () static void decompressTrack (const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t & clusterOffset, Args&... args)
74- {
75- float zOffset = 0 ;
76- uint32_t slice = cmprClusters.sliceA [trackIndex];
77- uint32_t row = cmprClusters.rowA [trackIndex];
78- GPUTPCCompressionTrackModel track;
79- uint32_t clusterIndex;
80- for (clusterIndex = 0 ; clusterIndex < cmprClusters.nTrackClusters [trackIndex]; clusterIndex++) {
81- uint32_t pad = 0 , time = 0 ;
82- if (clusterIndex != 0 ) {
83- uint8_t tmpSlice = cmprClusters.sliceLegDiffA [clusterOffset - trackIndex - 1 ];
84- bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
85- if (changeLeg) {
86- tmpSlice -= GPUCA_NSLICES;
87- }
88- if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
89- slice += tmpSlice;
90- if (slice >= GPUCA_NSLICES) {
91- slice -= GPUCA_NSLICES;
72+ template <typename ... Args>
73+ GPUhdi () static void decompressTrack (const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t & trackIndex, uint32_t & clusterOffset, Args&... args)
74+ {
75+ float zOffset = 0 ;
76+ uint32_t slice = cmprClusters.sliceA [trackIndex];
77+ uint32_t row = cmprClusters.rowA [trackIndex];
78+ GPUTPCCompressionTrackModel track;
79+ uint32_t clusterIndex;
80+ for (clusterIndex = 0 ; clusterIndex < cmprClusters.nTrackClusters [trackIndex]; clusterIndex++) {
81+ uint32_t pad = 0 , time = 0 ;
82+ if (clusterIndex != 0 ) {
83+ uint8_t tmpSlice = cmprClusters.sliceLegDiffA [clusterOffset - trackIndex - 1 ];
84+ bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
85+ if (changeLeg) {
86+ tmpSlice -= GPUCA_NSLICES;
87+ }
88+ if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
89+ slice += tmpSlice;
90+ if (slice >= GPUCA_NSLICES) {
91+ slice -= GPUCA_NSLICES;
92+ }
93+ row += cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
94+ if (row >= GPUCA_ROW_COUNT) {
95+ row -= GPUCA_ROW_COUNT;
96+ }
97+ } else {
98+ slice = tmpSlice;
99+ row = cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
92100 }
93- row += cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
94- if (row >= GPUCA_ROW_COUNT) {
95- row -= GPUCA_ROW_COUNT;
101+ if (changeLeg && track.Mirror ()) {
102+ break ;
103+ }
104+ if (track.Propagate (param.tpcGeometry .Row2X (row), param.SliceParam [slice].Alpha )) {
105+ break ;
106+ }
107+ uint32_t timeTmp = cmprClusters.timeResA [clusterOffset - trackIndex - 1 ];
108+ if (timeTmp & 800000 ) {
109+ timeTmp |= 0xFF000000 ;
110+ }
111+ time = timeTmp + ClusterNative::packTime (CAMath::Max (0 .f , param.tpcGeometry .LinearZ2Time (slice, track.Z () + zOffset)));
112+ float tmpPad = CAMath::Max (0 .f , CAMath::Min ((float )param.tpcGeometry .NPads (GPUCA_ROW_COUNT - 1 ), param.tpcGeometry .LinearY2Pad (slice, row, track.Y ())));
113+ pad = cmprClusters.padResA [clusterOffset - trackIndex - 1 ] + ClusterNative::packPad (tmpPad);
114+ time = time & 0xFFFFFF ;
115+ pad = (uint16_t )pad;
116+ if (pad >= param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked) {
117+ if (pad >= 0xFFFF - 11968 ) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
118+ pad = 0 ;
119+ } else {
120+ pad = param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked - 1 ;
121+ }
122+ }
123+ if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
124+ if (time >= 0xFFFFFF - 544768 ) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
125+ time = 0 ;
126+ } else {
127+ time = maxTime;
128+ }
96129 }
97130 } else {
98- slice = tmpSlice ;
99- row = cmprClusters.rowDiffA [clusterOffset - trackIndex - 1 ];
131+ time = cmprClusters. timeA [trackIndex] ;
132+ pad = cmprClusters.padA [ trackIndex];
100133 }
101- if (changeLeg && track.Mirror ()) {
102- break ;
134+ const auto cluster = decompressTrackStore (cmprClusters, clusterOffset, slice, row, pad, time, args...);
135+ float y = param.tpcGeometry .LinearPad2Y (slice, row, cluster.getPad ());
136+ float z = param.tpcGeometry .LinearTime2Z (slice, cluster.getTime ());
137+ if (clusterIndex == 0 ) {
138+ zOffset = z;
139+ track.Init (param.tpcGeometry .Row2X (row), y, z - zOffset, param.SliceParam [slice].Alpha , cmprClusters.qPtA [trackIndex], param);
103140 }
104- if (track. Propagate (param. tpcGeometry . Row2X (row), param. SliceParam [slice]. Alpha )) {
141+ if (clusterIndex + 1 < cmprClusters. nTrackClusters [trackIndex] && track. Filter (y, z - zOffset, row )) {
105142 break ;
106143 }
107- uint32_t timeTmp = cmprClusters.timeResA [clusterOffset - trackIndex - 1 ];
108- if (timeTmp & 800000 ) {
109- timeTmp |= 0xFF000000 ;
110- }
111- time = timeTmp + ClusterNative::packTime (CAMath::Max (0 .f , param.tpcGeometry .LinearZ2Time (slice, track.Z () + zOffset)));
112- float tmpPad = CAMath::Max (0 .f , CAMath::Min ((float )param.tpcGeometry .NPads (GPUCA_ROW_COUNT - 1 ), param.tpcGeometry .LinearY2Pad (slice, row, track.Y ())));
113- pad = cmprClusters.padResA [clusterOffset - trackIndex - 1 ] + ClusterNative::packPad (tmpPad);
114- time = time & 0xFFFFFF ;
115- pad = (uint16_t )pad;
116- if (pad >= param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked) {
117- if (pad >= 0xFFFF - 11968 ) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
118- pad = 0 ;
119- } else {
120- pad = param.tpcGeometry .NPads (row) * ClusterNative::scalePadPacked - 1 ;
121- }
122- }
123- if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
124- if (time >= 0xFFFFFF - 544768 ) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
125- time = 0 ;
126- } else {
127- time = maxTime;
128- }
129- }
130- } else {
131- time = cmprClusters.timeA [trackIndex];
132- pad = cmprClusters.padA [trackIndex];
133- }
134- const auto cluster = decompressTrackStore (cmprClusters, clusterOffset, slice, row, pad, time, args...);
135- float y = param.tpcGeometry .LinearPad2Y (slice, row, cluster.getPad ());
136- float z = param.tpcGeometry .LinearTime2Z (slice, cluster.getTime ());
137- if (clusterIndex == 0 ) {
138- zOffset = z;
139- track.Init (param.tpcGeometry .Row2X (row), y, z - zOffset, param.SliceParam [slice].Alpha , cmprClusters.qPtA [trackIndex], param);
140- }
141- if (clusterIndex + 1 < cmprClusters.nTrackClusters [trackIndex] && track.Filter (y, z - zOffset, row)) {
142- break ;
144+ clusterOffset++;
143145 }
144- clusterOffset++ ;
146+ clusterOffset += cmprClusters. nTrackClusters [trackIndex] - clusterIndex ;
145147 }
146- clusterOffset += cmprClusters.nTrackClusters [trackIndex] - clusterIndex;
147- }
148148
149- GPUhdi () static const auto & decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
150- {
151- return ((*(clusterBuffer++) = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k])));
152- }
149+ GPUhdi () static const auto & decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
150+ {
151+ return ((*(clusterBuffer++) = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k])));
152+ }
153153
154- GPUhdi () static auto decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void (const ClusterNative&, uint32_t )> func)
155- {
156- const auto cluster = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k]);
157- func (cluster, k);
158- return cluster;
159- }
154+ GPUhdi () static auto decompressHitsStore (const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void (const ClusterNative&, uint32_t )> func)
155+ {
156+ const auto cluster = ClusterNative (time, cmprClusters.flagsU [k], pad, cmprClusters.sigmaTimeU [k], cmprClusters.sigmaPadU [k], cmprClusters.qMaxU [k], cmprClusters.qTotU [k]);
157+ func (cluster, k);
158+ return cluster;
159+ }
160160
161- template <typename ... Args>
162- GPUdi () static void decompressHits (const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
163- {
164- uint32_t time = 0 ;
165- uint16_t pad = 0 ;
166- for (uint32_t k = start; k < end; k++) {
167- if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
168- uint32_t timeTmp = cmprClusters.timeDiffU [k];
169- if (timeTmp & 800000 ) {
170- timeTmp |= 0xFF000000 ;
161+ template <typename ... Args>
162+ GPUdi () static void decompressHits (const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
163+ {
164+ uint32_t time = 0 ;
165+ uint16_t pad = 0 ;
166+ for (uint32_t k = start; k < end; k++) {
167+ if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
168+ uint32_t timeTmp = cmprClusters.timeDiffU [k];
169+ if (timeTmp & 800000 ) {
170+ timeTmp |= 0xFF000000 ;
171+ }
172+ time += timeTmp;
173+ pad += cmprClusters.padDiffU [k];
174+ } else {
175+ time = cmprClusters.timeDiffU [k];
176+ pad = cmprClusters.padDiffU [k];
171177 }
172- time += timeTmp;
173- pad += cmprClusters.padDiffU [k];
174- } else {
175- time = cmprClusters.timeDiffU [k];
176- pad = cmprClusters.padDiffU [k];
178+ decompressHitsStore (cmprClusters, k, time, pad, args...);
177179 }
178- decompressHitsStore (cmprClusters, k, time, pad, args...);
179180 }
180- }
181-
182181};
183- }
182+ } // namespace GPUCA_NAMESPACE::gpu
184183
185184#endif
0 commit comments