2323#include " TPCPadGainCalib.h"
2424#include " TPCZSLinkMapping.h"
2525#include " GPUTPCGeometry.h"
26+ #include " DetectorsRaw/RDHUtils.h"
2627
2728using namespace o2 ::gpu;
2829using namespace o2 ::gpu::tpccf;
@@ -302,26 +303,6 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTB(
302303 uint8_t myOffset = warp_scan_inclusive_add (myChannelActive) - 1 + blockOffset;
303304 blockOffset = warp_broadcast (myOffset, NTHREADS - 1 ) + 1 ;
304305
305- // Decode entire timebin at once if we have enough threads
306- // This should further improve performance, but code below is buggy...
307- // if (nAdc <= NThreads) {
308- // for (int32_t j = 1; blockOffset < nAdc; j++) {
309- // rawFECChannel = myChannelActive ? rawFECChannel : (iThread + j*NThreads - myOffset);
310-
311- // bool iAmIdle = not myChannelActive;
312-
313- // myChannelActive =
314- // rawFECChannel < zerosupp_link_based::CommonHeaderlPerTBHeader
315- // ? BitIsSet(channelMask, rawFECChannel)
316- // : false;
317-
318- // uint8_t newOffset = warp_scan_inclusive_add(static_cast<uint8_t>(myChannelActive && iAmIdle)) - 1 + blockOffset;
319- // blockOffset = warp_broadcast(newOffset, NThreads - 1) + 1;
320-
321- // myOffset = iAmIdle ? newOffset : myOffset;
322- // }
323- // }
324-
325306 if (not myChannelActive) {
326307 continue ;
327308 }
@@ -331,28 +312,16 @@ GPUd() void GPUTPCCFDecodeZSLink::DecodeTB(
331312
332313 if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
333314
334- // Try to access adcData with 4 byte reads instead of 1 byte.
335- // You'd think this would improve performace, but it's actually slower...
336- // const uint32_t* adcDataU32 = reinterpret_cast<const uint32_t*>(adcData);
337-
338315 uint32_t adcBitOffset = myOffset * DECODE_BITS;
339316 uint32_t adcByteOffset = adcBitOffset / CHAR_BIT;
340317 uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * CHAR_BIT;
341- // uint32_t adcByteOffset = adcBitOffset / 32;
342- // uint32_t adcOffsetInByte = adcBitOffset - adcByteOffset * 32;
343318
344319 uint32_t byte = 0 , bits = 0 ;
345320
346- // uint32_t byte = adcDataU32[adcByteOffset] >> adcOffsetInByte;
347- // uint32_t bits = 32 - adcOffsetInByte;
348- // adcByteOffset++;
349-
350321 while (bits < DECODE_BITS) {
351322 byte |= ((uint32_t )adcData[adcByteOffset]) << bits;
352- // byte |= adcDataU32[adcByteOffset] << bits;
353323 adcByteOffset++;
354324 bits += CHAR_BIT;
355- // bits += 32;
356325 }
357326 adc = byte >> adcOffsetInByte;
358327
0 commit comments