Skip to content

Commit e8ad660

Browse files
committed
GPU: Unify GPU and CPU versions of TPC ZS decoders.
1 parent 22e2e61 commit e8ad660

File tree

2 files changed

+7
-225
lines changed

2 files changed

+7
-225
lines changed

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx

Lines changed: 5 additions & 216 deletions
Original file line numberDiff line numberDiff line change
@@ -251,8 +251,7 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, DecodeCtx&
251251
if (discardTimeBin) {
252252
FillWithInvalid(ctx.clusterer, ctx.iThread, ctx.nThreads, ctx.pageDigitOffset, nAdc);
253253
} else {
254-
#ifdef GPUCA_GPUCODE
255-
DecodeTBMultiThread(
254+
DecodeTB(
256255
smem,
257256
ctx,
258257
adcData,
@@ -261,16 +260,6 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, DecodeCtx&
261260
timeBin,
262261
decHdr->cruID,
263262
tbHdr->fecInPartition);
264-
#else // CPU
265-
DecodeTBSingleThread(
266-
ctx,
267-
adcData,
268-
nAdc,
269-
channelMask,
270-
timeBin,
271-
decHdr->cruID,
272-
tbHdr->fecInPartition);
273-
#endif
274263
}
275264

276265
ctx.pageDigitOffset += nAdc;
@@ -290,62 +279,7 @@ GPUd() size_t GPUTPCCFDecodeZSLink::DecodePage(GPUSharedMemory& smem, DecodeCtx&
290279
return ctx.pageDigitOffset;
291280
}
292281

293-
GPUd() void GPUTPCCFDecodeZSLink::DecodeTBSingleThread(
294-
DecodeCtx& ctx,
295-
const uint8_t* adcData,
296-
uint32_t nAdc,
297-
const uint32_t* channelMask,
298-
int32_t timeBin,
299-
int32_t cru,
300-
int32_t fecInPartition)
301-
{
302-
const CfFragment& fragment = ctx.clusterer.mPmemory->fragment;
303-
304-
if constexpr (TPCZSHDRV2::TIGHTLY_PACKED_V3) {
305-
306-
uint32_t byte = 0, bits = 0, nSamplesWritten = 0, rawFECChannel = 0;
307-
308-
// unpack adc values, assume tightly packed data
309-
while (nSamplesWritten < nAdc) {
310-
byte |= adcData[0] << bits;
311-
adcData++;
312-
bits += CHAR_BIT;
313-
while (bits >= DECODE_BITS) {
314-
315-
// Find next channel with data
316-
for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
317-
}
318-
319-
// Unpack data for cluster finder
320-
o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(ctx.clusterer, cru, rawFECChannel, fecInPartition);
321-
322-
WriteCharge(ctx.clusterer, byte, padAndRow, fragment.toLocal(timeBin), ctx.pageDigitOffset + nSamplesWritten);
323-
324-
byte = byte >> DECODE_BITS;
325-
bits -= DECODE_BITS;
326-
nSamplesWritten++;
327-
rawFECChannel++; // Ensure we don't decode same channel twice
328-
} // while (bits >= DECODE_BITS)
329-
} // while (nSamplesWritten < nAdc)
330-
331-
} else { // ! TPCZSHDRV2::TIGHTLY_PACKED_V3
332-
uint32_t rawFECChannel = 0;
333-
const uint64_t* adcData64 = (const uint64_t*)adcData;
334-
for (uint32_t j = 0; j < nAdc; j++) {
335-
for (; !ChannelIsActive(channelMask, rawFECChannel); rawFECChannel++) {
336-
}
337-
338-
uint32_t adc = (adcData64[j / TPCZSHDRV2::SAMPLESPER64BIT] >> ((j % TPCZSHDRV2::SAMPLESPER64BIT) * DECODE_BITS)) & DECODE_MASK;
339-
340-
o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(ctx.clusterer, cru, rawFECChannel, fecInPartition);
341-
float charge = ADCToFloat(adc, DECODE_MASK, DECODE_BITS_FACTOR);
342-
WriteCharge(ctx.clusterer, charge, padAndRow, fragment.toLocal(timeBin), ctx.pageDigitOffset + j);
343-
rawFECChannel++;
344-
}
345-
}
346-
}
347-
348-
GPUd() void GPUTPCCFDecodeZSLink::DecodeTBMultiThread(
282+
GPUd() void GPUTPCCFDecodeZSLink::DecodeTB(
349283
GPUSharedMemory& smem,
350284
DecodeCtx& ctx,
351285
const uint8_t* adcData,
@@ -601,12 +535,6 @@ GPUd() void GPUTPCCFDecodeZSDenseLink::Thread<0>(int32_t nBlocks, int32_t nThrea
601535

602536
GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, DecodeCtx& ctx)
603537
{
604-
#ifdef GPUCA_GPUCODE
605-
constexpr bool DecodeInParallel = true;
606-
#else
607-
constexpr bool DecodeInParallel = false;
608-
#endif
609-
610538
const uint8_t* const pageStart = ctx.page;
611539

612540
const auto* rawDataHeader = Peek<header::RAWDataHeader>(ctx.page);
@@ -651,13 +579,13 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, Dec
651579
}
652580

653581
if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
654-
nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(smem, ctx, rawDataHeader, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
582+
nSamplesWrittenTB = DecodeTB<true>(smem, ctx, rawDataHeader, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
655583
} else {
656584
err = GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF;
657585
break;
658586
}
659587
} else {
660-
nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(smem, ctx, rawDataHeader, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
588+
nSamplesWrittenTB = DecodeTB<false>(smem, ctx, rawDataHeader, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
661589
}
662590

663591
// Abort decoding the page if an error was detected.
@@ -712,30 +640,8 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, Dec
712640
return ctx.pageDigitOffset;
713641
}
714642

715-
template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
716-
GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
717-
[[maybe_unused]] GPUSharedMemory& smem,
718-
DecodeCtx& ctx,
719-
const header::RAWDataHeader* rawDataHeader,
720-
int32_t cru,
721-
uint16_t nSamplesLeftInPage,
722-
const uint8_t* payloadEnd,
723-
const uint8_t* nextPage)
724-
{
725-
726-
if constexpr (DecodeInParallel) {
727-
return DecodeTBMultiThread<PayloadExtendsToNextPage>(smem, ctx, rawDataHeader, cru, nSamplesLeftInPage, payloadEnd, nextPage);
728-
} else {
729-
int16_t nSamplesWritten = 0;
730-
if (ctx.iThread == 0) {
731-
nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(ctx, rawDataHeader, cru, nSamplesLeftInPage, payloadEnd, nextPage);
732-
}
733-
return warp_broadcast(nSamplesWritten, 0);
734-
}
735-
}
736-
737643
template <bool PayloadExtendsToNextPage>
738-
GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
644+
GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
739645
GPUSharedMemory& smem,
740646
DecodeCtx& ctx,
741647
const header::RAWDataHeader* rawDataHeader,
@@ -883,123 +789,6 @@ GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
883789
#undef MAYBE_PAGE_OVERFLOW
884790
}
885791

886-
template <bool PayloadExtendsToNextPage>
887-
GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
888-
DecodeCtx& ctx,
889-
const header::RAWDataHeader* rawDataHeader,
890-
int32_t cru,
891-
uint16_t nSamplesLeftInPage,
892-
const uint8_t* payloadEnd,
893-
const uint8_t* nextPage)
894-
{
895-
#define MAYBE_PAGE_OVERFLOW(pagePtr) \
896-
if constexpr (PayloadExtendsToNextPage) { \
897-
if (pagePtr >= payloadEnd && pagePtr < nextPage) { \
898-
ptrdiff_t diff = pagePtr - payloadEnd; \
899-
pagePtr = nextPage; \
900-
ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
901-
} \
902-
} else { \
903-
if (pagePtr > payloadEnd) { \
904-
return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \
905-
} \
906-
}
907-
908-
using zerosupp_link_based::ChannelPerTBHeader;
909-
910-
const CfFragment& fragment = ctx.clusterer.mPmemory->fragment;
911-
912-
uint8_t linkIds[MaxNLinksPerTimebin];
913-
uint8_t channelMasks[MaxNLinksPerTimebin * 10] = {0};
914-
uint16_t nSamplesWritten = 0;
915-
916-
// Read timebin block header
917-
uint16_t tbbHdr = ConsumeByte(ctx.page);
918-
MAYBE_PAGE_OVERFLOW(ctx.page);
919-
tbbHdr |= static_cast<uint16_t>(ConsumeByte(ctx.page)) << CHAR_BIT;
920-
MAYBE_PAGE_OVERFLOW(ctx.page);
921-
922-
uint8_t nLinksInTimebin = tbbHdr & 0x000F;
923-
uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
924-
int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - ctx.firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;
925-
926-
uint16_t nSamplesInTB = 0;
927-
928-
// Read timebin link headers
929-
for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
930-
uint8_t timebinLinkHeaderStart = ConsumeByte(ctx.page);
931-
MAYBE_PAGE_OVERFLOW(ctx.page);
932-
933-
linkIds[iLink] = timebinLinkHeaderStart & 0b00011111;
934-
935-
bool bitmaskIsFlat = timebinLinkHeaderStart & 0b00100000;
936-
937-
uint16_t bitmaskL2 = 0x0FFF;
938-
if (not bitmaskIsFlat) {
939-
bitmaskL2 = static_cast<uint16_t>(timebinLinkHeaderStart & 0b11000000) << 2 | static_cast<uint16_t>(ConsumeByte(ctx.page));
940-
MAYBE_PAGE_OVERFLOW(ctx.page);
941-
}
942-
943-
for (int32_t i = 0; i < 10; i++) {
944-
if (bitmaskL2 & 1 << i) {
945-
nSamplesInTB += CAMath::Popcount(*Peek(ctx.page));
946-
channelMasks[10 * iLink + i] = ConsumeByte(ctx.page);
947-
MAYBE_PAGE_OVERFLOW(ctx.page);
948-
}
949-
}
950-
951-
} // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)
952-
953-
if (nSamplesInTB > nSamplesLeftInPage) {
954-
return -GPUErrors::ERROR_TPCZS_INVALID_NADC;
955-
}
956-
957-
const uint8_t* adcData = ConsumeBytes(ctx.page, (nSamplesInTB * DECODE_BITS + 7) / 8);
958-
MAYBE_PAGE_OVERFLOW(ctx.page);
959-
960-
bool discardTimeBin = not fragment.contains(timeBin);
961-
discardTimeBin |= (ctx.tpcTimeBinCut > 0 && timeBin > ctx.tpcTimeBinCut);
962-
963-
if (discardTimeBin) {
964-
return FillWithInvalid(ctx.clusterer, 0, 1, ctx.pageDigitOffset, nSamplesInTB);
965-
}
966-
967-
// Unpack ADC
968-
uint32_t byte = 0, bits = 0;
969-
uint16_t rawFECChannel = 0;
970-
971-
// unpack adc values, assume tightly packed data
972-
while (nSamplesWritten < nSamplesInTB) {
973-
byte |= static_cast<uint32_t>(ConsumeByte(adcData)) << bits;
974-
MAYBE_PAGE_OVERFLOW(adcData);
975-
bits += CHAR_BIT;
976-
while (bits >= DECODE_BITS) {
977-
978-
// Find next channel with data
979-
for (; !ChannelIsActive(channelMasks, rawFECChannel); rawFECChannel++) {
980-
}
981-
982-
int32_t iLink = rawFECChannel / ChannelPerTBHeader;
983-
int32_t rawFECChannelLink = rawFECChannel % ChannelPerTBHeader;
984-
985-
// Unpack data for cluster finder
986-
o2::tpc::PadPos padAndRow = GetPadAndRowFromFEC(ctx.clusterer, cru, rawFECChannelLink, linkIds[iLink]);
987-
988-
float charge = ADCToFloat(byte, DECODE_MASK, DECODE_BITS_FACTOR);
989-
WriteCharge(ctx.clusterer, charge, padAndRow, fragment.toLocal(timeBin), ctx.pageDigitOffset + nSamplesWritten);
990-
991-
byte >>= DECODE_BITS;
992-
bits -= DECODE_BITS;
993-
nSamplesWritten++;
994-
rawFECChannel++; // Ensure we don't decode same channel twice
995-
} // while (bits >= DECODE_BITS)
996-
} // while (nSamplesWritten < nAdc)
997-
998-
return nSamplesWritten;
999-
1000-
#undef MAYBE_PAGE_OVERFLOW
1001-
}
1002-
1003792
GPUd() bool GPUTPCCFDecodeZSDenseLink::ChannelIsActive(const uint8_t* chan, uint16_t chanIndex)
1004793
{
1005794
constexpr uint8_t N_BITS_PER_ENTRY = sizeof(*chan) * CHAR_BIT;

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h

Lines changed: 2 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -148,8 +148,7 @@ class GPUTPCCFDecodeZSLink : public GPUTPCCFDecodeZSLinkBase
148148
GPUd() static void GetChannelBitmask(const tpc::zerosupp_link_based::CommonHeader& tbHdr, uint32_t* chan);
149149
GPUd() static bool ChannelIsActive(const uint32_t* chan, uint8_t chanIndex);
150150

151-
GPUd() static void DecodeTBSingleThread(DecodeCtx& ctx, const uint8_t* adcData, uint32_t nAdc, const uint32_t* channelMask, int32_t timeBin, int32_t cru, int32_t fecInPartition);
152-
GPUd() static void DecodeTBMultiThread(GPUSharedMemory& smem, DecodeCtx& ctx, const uint8_t* adcData, uint32_t nAdc, const uint32_t* channelMask, int32_t timeBin, int32_t cru, int32_t fecInPartition);
151+
GPUd() static void DecodeTB(GPUSharedMemory& smem, DecodeCtx& ctx, const uint8_t* adcData, uint32_t nAdc, const uint32_t* channelMask, int32_t timeBin, int32_t cru, int32_t fecInPartition);
153152
};
154153

155154
class GPUTPCCFDecodeZSDenseLink : public GPUTPCCFDecodeZSLinkBase
@@ -179,14 +178,8 @@ class GPUTPCCFDecodeZSDenseLink : public GPUTPCCFDecodeZSLinkBase
179178
// Decode a single timebin within an 8kb page.
180179
// Returns the number of samples decoded from the page
181180
// or negative value to indicate an error (no samples are written in this case)
182-
template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
183-
GPUd() static int16_t DecodeTB(GPUSharedMemory& smem, DecodeCtx& ctx, const header::RAWDataHeader* rawDataHeader, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage);
184-
185181
template <bool PayloadExtendsToNextPage>
186-
GPUd() static int16_t DecodeTBSingleThread(DecodeCtx& ctx, const header::RAWDataHeader* rawDataHeader, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage);
187-
188-
template <bool PayloadExtendsToNextPage>
189-
GPUd() static int16_t DecodeTBMultiThread(GPUSharedMemory& smem, DecodeCtx& ctx, const header::RAWDataHeader* rawDataHeader, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage);
182+
GPUd() static int16_t DecodeTB(GPUSharedMemory& smem, DecodeCtx& ctx, const header::RAWDataHeader* rawDataHeader, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage);
190183
};
191184

192185
} // namespace o2::gpu

0 commit comments

Comments
 (0)