Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 5 additions & 1 deletion GPU/GPUTracking/Global/GPUErrorCodes.h
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,10 @@ GPUCA_ERROR_CODE(26, ERROR_TPCZS_INVALID_ROW, SectorRow)
GPUCA_ERROR_CODE(27, ERROR_TPCZS_INVALID_NADC, SectorCRU, SamplesInPage, SamplesWritten) // Invalid number of ADC samples in header, existing samples were decoded
GPUCA_ERROR_CODE(28, ERROR_TPCZS_INCOMPLETE_HBF, SectorCRU, PacketCount, NextPacketCount) // Part of HBF is missing, decoding incomplete
GPUCA_ERROR_CODE(29, ERROR_TPCZS_INVALID_OFFSET, SectorEndpoint, Value, Expected) // Raw page is skipped since it contains invalid payload offset
GPUCA_ERROR_CODE(29, MAX_GPUCA_ERROR_NUMBER)
GPUCA_ERROR_CODE(30, ERROR_TPCZS_INVALID_MAGIC_WORD, Value) // ZS header contains wrong magic word
GPUCA_ERROR_CODE(31, ERROR_TPCZS_PAGE_OVERFLOW, Position, PageEnd) // Ran out of page to decode
GPUCA_ERROR_CODE(32, ERROR_TPCZS_VERSION_MISMATCH, Value, Expected) // ZS decoder received page with wrong version
GPUCA_ERROR_CODE(33, ERROR_TPCZS_UNKNOWN, ErrorCode) // Unkown or invalid error code raised in decoder
GPUCA_ERROR_CODE(33, MAX_GPUCA_ERROR_NUMBER)

// #define GPUCA_CHECK_TPCZS_CORRUPTION
156 changes: 104 additions & 52 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -604,61 +604,107 @@ GPUd() uint32_t GPUTPCCFDecodeZSDenseLink::DecodePage(GPUSharedMemory& smem, pro
const auto* decHeader = Peek<TPCZSHDRV2>(page, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2));
ConsumeHeader<header::RAWDataHeader>(page);

assert(decHeader->version >= ZSVersionDenseLinkBased);
assert(decHeader->magicWord == tpc::zerosupp_link_based::CommonHeader::MagicWordLinkZSMetaHeader);

uint16_t nSamplesWritten = 0;
const uint16_t nSamplesInPage = decHeader->nADCsamples;

const auto* payloadEnd = Peek(pageStart, raw::RDHUtils::getMemorySize(*rawDataHeader) - sizeof(TPCZSHDRV2) - ((decHeader->flags & TPCZSHDRV2::ZSFlags::TriggerWordPresent) ? TPCZSHDRV2::TRIGGER_WORD_SIZE : 0));
const auto* nextPage = Peek(pageStart, TPCZSHDR::TPC_ZS_PAGE_SIZE);

const bool extendsToNextPage = decHeader->flags & TPCZSHDRV2::ZSFlags::payloadExtendsToNextPage;

ConsumeBytes(page, decHeader->firstZSDataOffset - sizeof(o2::header::RAWDataHeader));

for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++) {
int err = GPUErrors::ERROR_NONE;

[[maybe_unused]] ptrdiff_t sizeLeftInPage = payloadEnd - page;
assert(sizeLeftInPage > 0);
if (decHeader->version < ZSVersionDenseLinkBased) {
err = GPUErrors::ERROR_TPCZS_VERSION_MISMATCH;
}

uint16_t nSamplesWrittenTB = 0;
if (decHeader->magicWord != zerosupp_link_based::CommonHeader::MagicWordLinkZSMetaHeader) {
err = GPUErrors::ERROR_TPCZS_INVALID_MAGIC_WORD;
}

for (uint16_t i = 0; i < decHeader->nTimebinHeaders && !err; i++) {

ptrdiff_t sizeLeftInPage = payloadEnd - page;
if (sizeLeftInPage <= 0) {
err = GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW;
break;
}

int16_t nSamplesWrittenTB = 0;
uint16_t nSamplesLeftInPage = nSamplesInPage - nSamplesWritten;

if (i == decHeader->nTimebinHeaders - 1 && extendsToNextPage) {
if (raw::RDHUtils::getMemorySize(*rawDataHeader) != TPCZSHDR::TPC_ZS_PAGE_SIZE) {
err = GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW;
break;
}

if (i == decHeader->nTimebinHeaders - 1 && decHeader->flags & o2::tpc::TPCZSHDRV2::ZSFlags::payloadExtendsToNextPage) {
assert(o2::raw::RDHUtils::getMemorySize(*rawDataHeader) == TPCZSHDR::TPC_ZS_PAGE_SIZE);
if ((uint16_t)(raw::RDHUtils::getPageCounter(rawDataHeader) + 1) == raw::RDHUtils::getPageCounter(nextPage)) {
nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
nSamplesWrittenTB = DecodeTB<DecodeInParallel, true>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
} else {
nSamplesWrittenTB = FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
if (iThread == 0) {
clusterer.raiseError(GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
}
#endif
err = GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF;
break;
}
} else {
nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, payloadEnd, nextPage);
nSamplesWrittenTB = DecodeTB<DecodeInParallel, false>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, decHeader->cruID, nSamplesLeftInPage, payloadEnd, nextPage);
}

// Abort decoding the page if an error was detected.
if (nSamplesWrittenTB < 0) {
err = -nSamplesWrittenTB;
break;
}

assert(nSamplesWritten <= nSamplesInPage);
nSamplesWritten += nSamplesWrittenTB;
pageDigitOffset += nSamplesWrittenTB;
} // for (uint16_t i = 0; i < decHeader->nTimebinHeaders; i++)

#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
if (iThread == 0 && nSamplesWritten != nSamplesInPage) {
clusterer.raiseError(GPUErrors::ERROR_TPCZS_INVALID_NADC, clusterer.mISector * 1000 + decHeader->cruID, nSamplesInPage, nSamplesWritten);
/*#ifndef GPUCA_GPUCODE
FILE* foo = fopen("dump.bin", "w+b");
fwrite(pageSrc, 1, o2::raw::RDHUtils::getMemorySize(*rdHdr), foo);
fclose(foo);
#endif*/
if (nSamplesWritten != nSamplesInPage) {
if (nSamplesWritten < nSamplesInPage) {
pageDigitOffset += FillWithInvalid(clusterer, iThread, nThreads, pageDigitOffset, nSamplesInPage - nSamplesWritten);
}
err = !err ? GPUErrors::ERROR_TPCZS_INVALID_NADC : err; // Ensure we don't overwrite any previous error
}

if (iThread == 0 && err) {
[[maybe_unused]] bool dumpPage = false;

if (err == GPUErrors::ERROR_TPCZS_VERSION_MISMATCH) {
clusterer.raiseError(err, decHeader->version, ZSVersionDenseLinkBased);
} else if (err == GPUErrors::ERROR_TPCZS_INVALID_MAGIC_WORD) {
clusterer.raiseError(err, decHeader->magicWord);
} else if (err == GPUErrors::ERROR_TPCZS_INCOMPLETE_HBF) {
clusterer.raiseError(err, clusterer.mISector * 1000 + decHeader->cruID, raw::RDHUtils::getPageCounter(rawDataHeader), raw::RDHUtils::getPageCounter(nextPage));
} else if (err == GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW) {
clusterer.raiseError(err, extendsToNextPage);
dumpPage = true;
} else if (err == GPUErrors::ERROR_TPCZS_INVALID_NADC) {
clusterer.raiseError(err, nSamplesInPage, nSamplesWritten, extendsToNextPage);
dumpPage = true;
} else {
clusterer.raiseError(GPUErrors::ERROR_TPCZS_UNKNOWN, err);
}

#ifdef GPUCA_CHECK_TPCZS_CORRUPTION
#ifndef GPUCA_GPUCODE
if (dumpPage) {
// allocate more space on the stack for fname, so it can be overwritten by hand in a debugger.
const char fname[64] = "dump00.bin";
FILE* foo = fopen(fname, "w+b");
fwrite(pageStart, 1, TPCZSHDR::TPC_ZS_PAGE_SIZE, foo);
fclose(foo);
}
#endif
#endif
}

return pageDigitOffset;
}

template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
processorType& clusterer,
[[maybe_unused]] GPUSharedMemory& smem,
int32_t iThread,
Expand All @@ -667,23 +713,24 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTB(
const header::RAWDataHeader* rawDataHeader,
int32_t firstHBF,
int32_t cru,
[[maybe_unused]] const uint8_t* payloadEnd,
[[maybe_unused]] const uint8_t* nextPage)
uint16_t nSamplesLeftInPage,
const uint8_t* payloadEnd,
const uint8_t* nextPage)
{

if constexpr (DecodeInParallel) {
return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
return DecodeTBMultiThread<PayloadExtendsToNextPage>(clusterer, smem, iThread, page, pageDigitOffset, rawDataHeader, firstHBF, cru, nSamplesLeftInPage, payloadEnd, nextPage);
} else {
uint16_t nSamplesWritten = 0;
int16_t nSamplesWritten = 0;
if (iThread == 0) {
nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, payloadEnd, nextPage);
nSamplesWritten = DecodeTBSingleThread<PayloadExtendsToNextPage>(clusterer, page, pageDigitOffset, rawDataHeader, firstHBF, cru, nSamplesLeftInPage, payloadEnd, nextPage);
}
return warp_broadcast(nSamplesWritten, 0);
}
}

template <bool PayloadExtendsToNextPage>
GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
processorType& clusterer,
GPUSharedMemory& smem,
const int32_t iThread,
Expand All @@ -692,8 +739,9 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
const header::RAWDataHeader* rawDataHeader,
int32_t firstHBF,
int32_t cru,
[[maybe_unused]] const uint8_t* payloadEnd,
[[maybe_unused]] const uint8_t* nextPage)
uint16_t nSamplesLeftInPage,
const uint8_t* payloadEnd,
const uint8_t* nextPage)
{
#define MAYBE_PAGE_OVERFLOW(pagePtr) \
if constexpr (PayloadExtendsToNextPage) { \
Expand All @@ -703,7 +751,9 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
} \
} else { \
assert(pagePtr <= payloadEnd); \
if (pagePtr > payloadEnd) { \
return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \
} \
}

#define PEEK_OVERFLOW(pagePtr, offset) \
Expand All @@ -728,7 +778,7 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
uint16_t linkBC = (tbbHdr & 0xFFF0) >> 4;
int32_t timeBin = (linkBC + (uint64_t)(raw::RDHUtils::getHeartBeatOrbit(*rawDataHeader) - firstHBF) * constants::lhc::LHCMaxBunches) / LHCBCPERTIMEBIN;

uint16_t nSamplesInTB = 0;
int16_t nSamplesInTB = 0;

// Read timebin link headers
for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++) {
Expand All @@ -747,7 +797,6 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
}

int32_t nBytesBitmask = CAMath::Popcount(bitmaskL2);
assert(nBytesBitmask <= 10);

for (int32_t chan = iThread; chan < CAMath::nextMultipleOf<NTHREADS>(80); chan += NTHREADS) {
int32_t chanL2Idx = chan / 8;
Expand All @@ -756,7 +805,6 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
int32_t chanByteOffset = nBytesBitmask - 1 - CAMath::Popcount(bitmaskL2 >> (chanL2Idx + 1));

uint8_t myChannelHasData = (chan < 80 && l2 ? TEST_BIT(PEEK_OVERFLOW(page, chanByteOffset), chan % 8) : 0);
assert(myChannelHasData == 0 || myChannelHasData == 1);

int32_t nSamplesStep;
int32_t threadSampleOffset = CfUtils::warpPredicateScan(myChannelHasData, &nSamplesStep);
Expand All @@ -779,13 +827,17 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(

GPUbarrierWarp(); // Ensure all writes to shared memory are finished, before reading it

const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
MAYBE_PAGE_OVERFLOW(page); // TODO: We don't need this check?
if (nSamplesInTB > nSamplesLeftInPage) {
return -GPUErrors::ERROR_TPCZS_INVALID_NADC;
}

if (not fragment.contains(timeBin)) {
return FillWithInvalid(clusterer, iThread, NTHREADS, pageDigitOffset, nSamplesInTB);
}

const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
MAYBE_PAGE_OVERFLOW(page);

// Unpack ADC
int32_t iLink = 0;
for (uint16_t sample = iThread; sample < nSamplesInTB; sample += NTHREADS) {
Expand Down Expand Up @@ -821,9 +873,6 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(

GPUbarrierWarp(); // Ensure all reads to shared memory are finished, before decoding next header into shmem

assert(PayloadExtendsToNextPage || adcData <= page);
assert(PayloadExtendsToNextPage || page <= payloadEnd);

return nSamplesInTB;

#undef TEST_BIT
Expand All @@ -832,13 +881,14 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBMultiThread(
}

template <bool PayloadExtendsToNextPage>
GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
GPUd() int16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
processorType& clusterer,
const uint8_t*& page,
uint32_t pageDigitOffset,
const header::RAWDataHeader* rawDataHeader,
int32_t firstHBF,
int32_t cru,
uint16_t nSamplesLeftInPage,
[[maybe_unused]] const uint8_t* payloadEnd,
[[maybe_unused]] const uint8_t* nextPage)
{
Expand All @@ -850,7 +900,9 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
ConsumeBytes(pagePtr, sizeof(header::RAWDataHeader) + diff); \
} \
} else { \
assert(pagePtr <= payloadEnd); \
if (pagePtr > payloadEnd) { \
return -GPUErrors::ERROR_TPCZS_PAGE_OVERFLOW; \
} \
}

using zerosupp_link_based::ChannelPerTBHeader;
Expand Down Expand Up @@ -898,14 +950,18 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(

} // for (uint8_t iLink = 0; iLink < nLinksInTimebin; iLink++)

const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
MAYBE_PAGE_OVERFLOW(page);
if (nSamplesInTB > nSamplesLeftInPage) {
return -GPUErrors::ERROR_TPCZS_INVALID_NADC;
}

if (not fragment.contains(timeBin)) {
FillWithInvalid(clusterer, 0, 1, pageDigitOffset, nSamplesInTB);
return nSamplesInTB;
}

const uint8_t* adcData = ConsumeBytes(page, (nSamplesInTB * DECODE_BITS + 7) / 8);
MAYBE_PAGE_OVERFLOW(page);

// Unpack ADC
uint32_t byte = 0, bits = 0;
uint16_t rawFECChannel = 0;
Expand Down Expand Up @@ -937,10 +993,6 @@ GPUd() uint16_t GPUTPCCFDecodeZSDenseLink::DecodeTBSingleThread(
} // while (bits >= DECODE_BITS)
} // while (nSamplesWritten < nAdc)

assert(PayloadExtendsToNextPage || adcData <= page);
assert(PayloadExtendsToNextPage || page <= payloadEnd);
assert(nSamplesWritten == nSamplesInTB);

return nSamplesWritten;

#undef MAYBE_PAGE_OVERFLOW
Expand Down
9 changes: 6 additions & 3 deletions GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDecodeZS.h
Original file line number Diff line number Diff line change
Expand Up @@ -167,14 +167,17 @@ class GPUTPCCFDecodeZSDenseLink : public GPUTPCCFDecodeZSLinkBase

GPUd() static bool ChannelIsActive(const uint8_t* chan, uint16_t chanIndex);

// Decode a single timebin within an 8kb page.
// Returns the number of samples decoded from the page
// or negative value to indicate an error (no samples are written in this case)
template <bool DecodeInParallel, bool PayloadExtendsToNextPage>
GPUd() static uint16_t DecodeTB(processorType& clusterer, GPUSharedMemory& smem, int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, const uint8_t* payloadEnd, const uint8_t* nextPage);
GPUd() static int16_t DecodeTB(processorType& clusterer, GPUSharedMemory& smem, int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage);

template <bool PayloadExtendsToNextPage>
GPUd() static uint16_t DecodeTBSingleThread(processorType& clusterer, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, const uint8_t* payloadEnd, const uint8_t* nextPage);
GPUd() static int16_t DecodeTBSingleThread(processorType& clusterer, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage);

template <bool PayloadExtendsToNextPage>
GPUd() static uint16_t DecodeTBMultiThread(processorType& clusterer, GPUSharedMemory& smem, const int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, const uint8_t* payloadEnd, const uint8_t* nextPage);
GPUd() static int16_t DecodeTBMultiThread(processorType& clusterer, GPUSharedMemory& smem, const int32_t iThread, const uint8_t*& page, uint32_t pageDigitOffset, const header::RAWDataHeader* rawDataHeader, int32_t firstHBF, int32_t cru, uint16_t nSamplesLeftInPage, const uint8_t* payloadEnd, const uint8_t* nextPage);
};

} // namespace o2::gpu
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/kernels.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ o2_gpu_add_kernel("GPUTPCCFStreamCompaction, scanDown" "= TPC
o2_gpu_add_kernel("GPUTPCCFStreamCompaction, compactDigits" "= TPCCLUSTERFINDER" LB int32_t iBuf int32_t stage CfChargePos* in CfChargePos* out)
o2_gpu_add_kernel("GPUTPCCFDecodeZS" "= TPCCLUSTERFINDER" LB int32_t firstHBF)
o2_gpu_add_kernel("GPUTPCCFDecodeZSLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF)
o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS" LB int32_t firstHBF)
o2_gpu_add_kernel("GPUTPCCFDecodeZSDenseLink" "GPUTPCCFDecodeZS ERRORS" LB int32_t firstHBF)
o2_gpu_add_kernel("GPUTPCCFGather" "=" LB o2::tpc::ClusterNative* dest)
o2_gpu_add_kernel("GPUTrackingRefitKernel, mode0asGPU" "= GLOBALREFIT " LB)
o2_gpu_add_kernel("GPUTrackingRefitKernel, mode1asTrackParCov" "= GLOBALREFIT " LB)
Expand Down