@@ -251,8 +251,7 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, DecodeCtx&
251251 if (discardTimeBin) {
252252 FillWithInvalid (ctx.clusterer , ctx.iThread , ctx.nThreads , ctx.pageDigitOffset , nAdc);
253253 } else {
254- #ifdef GPUCA_GPUCODE
255- DecodeTBMultiThread (
254+ DecodeTB (
256255 smem,
257256 ctx,
258257 adcData,
@@ -261,16 +260,6 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, DecodeCtx&
261260 timeBin,
262261 decHdr->cruID ,
263262 tbHdr->fecInPartition );
264- #else // CPU
265- DecodeTBSingleThread (
266- ctx,
267- adcData,
268- nAdc,
269- channelMask,
270- timeBin,
271- decHdr->cruID ,
272- tbHdr->fecInPartition );
273- #endif
274263 }
275264
276265 ctx.pageDigitOffset += nAdc;
@@ -290,62 +279,7 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, DecodeCtx&
290279 return ctx.pageDigitOffset ;
291280}
292281
293- GPUd () void GPUTPCCFDecodeZSLink::DecodeTBSingleThread (
294- DecodeCtx& ctx,
295- const uint8_t * adcData,
296- uint32_t nAdc,
297- const uint32_t * channelMask,
298- int32_t timeBin,
299- int32_t cru,
300- int32_t fecInPartition)
301- {
302- const CfFragment& fragment = ctx.clusterer .mPmemory ->fragment ;
303-
304- if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
305-
306- uint32_t byte = 0 , bits = 0 , nSamplesWritten = 0 , rawFECChannel = 0 ;
307-
308- // unpack adc values, assume tightly packed data
309- while (nSamplesWritten < nAdc) {
310- byte |= adcData[0 ] << bits;
311- adcData++;
312- bits += CHAR_BIT;
313- while (bits >= DECODE_BITS) {
314-
315- // Find next channel with data
316- for (; !ChannelIsActive (channelMask, rawFECChannel); rawFECChannel++) {
317- }
318-
319- // Unpack data for cluster finder
320- o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC (ctx.clusterer , cru, rawFECChannel, fecInPartition);
321-
322- WriteCharge (ctx.clusterer , byte, padAndRow, fragment.toLocal (timeBin), ctx.pageDigitOffset + nSamplesWritten);
323-
324- byte = byte >> DECODE_BITS;
325- bits -= DECODE_BITS;
326- nSamplesWritten++;
327- rawFECChannel++; // Ensure we don't decode same channel twice
328- } // while (bits >= DECODE_BITS)
329- } // while (nSamplesWritten < nAdc)
330-
331- } else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
332- uint32_t rawFECChannel = 0 ;
333- const uint64_t * adcData64 = (const uint64_t *)adcData;
334- for (uint32_t j = 0 ; j < nAdc; j++) {
335- for (; !ChannelIsActive (channelMask, rawFECChannel); rawFECChannel++) {
336- }
337-
338- uint32_t adc = (adcData64[j / TPCZSHDRV2::SAMPLESPER64BIT] >> ((j % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
339-
340- o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC (ctx.clusterer , cru, rawFECChannel, fecInPartition);
341- float charge = ADCToFloat (adc, DECODE_MASK, DECODE_BITS_FACTOR);
342- WriteCharge (ctx.clusterer , charge, padAndRow, fragment.toLocal (timeBin), ctx.pageDigitOffset + j);
343- rawFECChannel++;
344- }
345- }
346- }
347-
348- GPUd () void GPUTPCCFDecodeZSLink::DecodeTBMultiThread (
282+ GPUd () void GPUTPCCFDecodeZSLink::DecodeTB (
349283 GPUSharedMemory& smem,
350284 DecodeCtx& ctx,
351285 const uint8_t * adcData,
@@ -601,12 +535,6 @@ GPUd() void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThrea
601535
602536GPUd () uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage (GPUSharedMemory& smem, DecodeCtx& ctx)
603537{
604- #ifdef GPUCA_GPUCODE
605- constexpr bool DecodeInParallel = true ;
606- #else
607- constexpr bool DecodeInParallel = false ;
608- #endif
609-
610538 const uint8_t * const pageStart = ctx.page ;
611539
612540 const auto * rawDataHeader = Peek<header::RAWDataHeader>(ctx.page );
@@ -651,13 +579,13 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, Dec
651579 }
652580
653581 if ((uint16_t )(raw::RDHUtils::getPageCounter (rawDataHeader) + 1 ) == raw::RDHUtils::getPageCounter (nextPage)) {
654- nSamplesWrittenTB = DecodeTB<DecodeInParallel, true >(smem, ctx, rawDataHeader, decHeader->cruID , nSamplesLeftInPage, payloadEnd, nextPage);
582+ nSamplesWrittenTB = DecodeTB<true >(smem, ctx, rawDataHeader, decHeader->cruID , nSamplesLeftInPage, payloadEnd, nextPage);
655583 } else {
656584 err = GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF;
657585 break ;
658586 }
659587 } else {
660- nSamplesWrittenTB = DecodeTB<DecodeInParallel, false >(smem, ctx, rawDataHeader, decHeader->cruID , nSamplesLeftInPage, payloadEnd, nextPage);
588+ nSamplesWrittenTB = DecodeTB<false >(smem, ctx, rawDataHeader, decHeader->cruID , nSamplesLeftInPage, payloadEnd, nextPage);
661589 }
662590
663591 // Abort decoding the page if an error was detected.
@@ -712,30 +640,8 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, Dec
712640 return ctx.pageDigitOffset ;
713641}
714642
715- template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
716- GPUd () int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB (
717- [[maybe_unused]] GPUSharedMemory& smem,
718- DecodeCtx& ctx,
719- const header::RAWDataHeader* rawDataHeader,
720- int32_t cru,
721- uint16_t nSamplesLeftInPage,
722- const uint8_t * payloadEnd,
723- const uint8_t * nextPage)
724- {
725-
726- if constexpr (DecodeInParallel) {
727- return DecodeTBMultiThread<PayloadExtendsToNextPage>(smem, ctx, rawDataHeader, cru, nSamplesLeftInPage, payloadEnd, nextPage);
728- } else {
729- int16_t nSamplesWritten = 0 ;
730- if (ctx.iThread == 0 ) {
731- nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(ctx, rawDataHeader, cru, nSamplesLeftInPage, payloadEnd, nextPage);
732- }
733- return warp_broadcast (nSamplesWritten, 0 );
734- }
735- }
736-
737643template <bool PayloadExtendsToNextPage>
738- GPUd () int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread (
644+ GPUd () int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB (
739645 GPUSharedMemory& smem,
740646 DecodeCtx& ctx,
741647 const header::RAWDataHeader* rawDataHeader,
@@ -883,123 +789,6 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
883789#undef MAYBE_PAGE_OVERFLOW
884790}
885791
886- template <bool PayloadExtendsToNextPage>
887- GPUd () int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread (
888- DecodeCtx& ctx,
889- const header::RAWDataHeader* rawDataHeader,
890- int32_t cru,
891- uint16_t nSamplesLeftInPage,
892- const uint8_t * payloadEnd,
893- const uint8_t * nextPage)
894- {
895- #define MAYBE_PAGE_OVERFLOW (pagePtr ) \
896- if constexpr (PayloadExtendsToNextPage) { \
897- if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
898- ptrdiff_t diff = pagePtr - payloadEnd; \
899- pagePtr = nextPage; \
900- ConsumeBytes (pagePtr, sizeof (header::RAWDataHeader) + diff); \
901- } \
902- } else { \
903- if (pagePtr > payloadEnd) { \
904- return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \
905- } \
906- }
907-
908- using zerosupp_link_based::ChannelPerTBHeader;
909-
910- const CfFragment& fragment = ctx.clusterer .mPmemory ->fragment ;
911-
912- uint8_t linkIds[MaxNLinksPerTimebin];
913- uint8_t channelMasks[MaxNLinksPerTimebin * 10 ] = {0 };
914- uint16_t nSamplesWritten = 0 ;
915-
916- // Read timebin block header
917- uint16_t tbbHdr = ConsumeByte (ctx.page );
918- MAYBE_PAGE_OVERFLOW (ctx.page );
919- tbbHdr |= static_cast <uint16_t >(ConsumeByte (ctx.page )) << CHAR_BIT;
920- MAYBE_PAGE_OVERFLOW (ctx.page );
921-
922- uint8_t nLinksInTimebin = tbbHdr & 0x000F ;
923- uint16_t linkBC = (tbbHdr & 0xFFF0 ) >> 4 ;
924- int32_t timeBin = (linkBC + (uint64_t )(raw::RDHUtils::getHeartBeatOrbit (*rawDataHeader) - ctx.firstHBF ) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
925-
926- uint16_t nSamplesInTB = 0 ;
927-
928- // Read timebin link headers
929- for (uint8_t iLink = 0 ; iLink < nLinksInTimebin; iLink++) {
930- uint8_t timebinLinkHeaderStart = ConsumeByte (ctx.page );
931- MAYBE_PAGE_OVERFLOW (ctx.page );
932-
933- linkIds[iLink] = timebinLinkHeaderStart & 0b00011111 ;
934-
935- bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000 ;
936-
937- uint16_t bitmaskL2 = 0x0FFF ;
938- if (not bitmaskIsFlat) {
939- bitmaskL2 = static_cast <uint16_t >(timebinLinkHeaderStart & 0b11000000 ) << 2 | static_cast <uint16_t >(ConsumeByte (ctx.page ));
940- MAYBE_PAGE_OVERFLOW (ctx.page );
941- }
942-
943- for (int32_t i = 0 ; i < 10 ; i++) {
944- if (bitmaskL2 & 1 << i) {
945- nSamplesInTB += CAMath::Popcount (*Peek (ctx.page ));
946- channelMasks[10 * iLink + i] = ConsumeByte (ctx.page );
947- MAYBE_PAGE_OVERFLOW (ctx.page );
948- }
949- }
950-
951- } // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
952-
953- if (nSamplesInTB > nSamplesLeftInPage) {
954- return -GPUErrors::ERROR_TPCZS_INVALID_NADC;
955- }
956-
957- const uint8_t * adcData = ConsumeBytes (ctx.page , (nSamplesInTB * DECODE_BITS + 7 ) / 8 );
958- MAYBE_PAGE_OVERFLOW (ctx.page );
959-
960- bool discardTimeBin = not fragment.contains (timeBin);
961- discardTimeBin |= (ctx.tpcTimeBinCut > 0 && timeBin > ctx.tpcTimeBinCut );
962-
963- if (discardTimeBin) {
964- return FillWithInvalid (ctx.clusterer , 0 , 1 , ctx.pageDigitOffset , nSamplesInTB);
965- }
966-
967- // Unpack ADC
968- uint32_t byte = 0 , bits = 0 ;
969- uint16_t rawFECChannel = 0 ;
970-
971- // unpack adc values, assume tightly packed data
972- while (nSamplesWritten < nSamplesInTB) {
973- byte |= static_cast <uint32_t >(ConsumeByte (adcData)) << bits;
974- MAYBE_PAGE_OVERFLOW (adcData);
975- bits += CHAR_BIT;
976- while (bits >= DECODE_BITS) {
977-
978- // Find next channel with data
979- for (; !ChannelIsActive (channelMasks, rawFECChannel); rawFECChannel++) {
980- }
981-
982- int32_t iLink = rawFECChannel / ChannelPerTBHeader;
983- int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
984-
985- // Unpack data for cluster finder
986- o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC (ctx.clusterer , cru, rawFECChannelLink, linkIds[iLink]);
987-
988- float charge = ADCToFloat (byte, DECODE_MASK, DECODE_BITS_FACTOR);
989- WriteCharge (ctx.clusterer , charge, padAndRow, fragment.toLocal (timeBin), ctx.pageDigitOffset + nSamplesWritten);
990-
991- byte >>= DECODE_BITS;
992- bits -= DECODE_BITS;
993- nSamplesWritten++;
994- rawFECChannel++; // Ensure we don't decode same channel twice
995- } // while (bits >= DECODE_BITS)
996- } // while (nSamplesWritten < nAdc)
997-
998- return nSamplesWritten;
999-
1000- #undef MAYBE_PAGE_OVERFLOW
1001- }
1002-
1003792GPUd () bool GPUTPCCFDecodeZSDenseLink::ChannelIsActive (const uint8_t * chan, uint16_t chanIndex)
1004793{
1005794 constexpr uint8_t N_BITS_PER_ENTRY = sizeof (*chan) * CHAR_BIT;
0 commit comments