Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions Detectors/TPC/workflow/src/EntropyEncoderSpec.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@
#include "GPUO2InterfaceUtils.h"
#include "GPUParam.h"
#include "DataFormatsTPC/ClusterNative.h"
#include "TPCClusterDecompressor.inc"
#include "TPCClusterDecompressionCore.inc"
#include "GPUTPCCompressionKernels.inc"
#include "TPCCalibration/VDriftHelper.h"
#include "DetectorsBase/GRPGeomHelper.h"
Expand Down Expand Up @@ -183,7 +183,7 @@ void EntropyEncoderSpec::run(ProcessingContext& pc)
offset += clusters.nTrackClusters[lasti++];
}
lasti++;
o2::gpu::TPCClusterDecompressor::decompressTrack(&clusters, *mParam, maxTime, i, offset, checker);
o2::gpu::TPCClusterDecompressionCore::decompressTrack(clusters, *mParam, maxTime, i, offset, checker);
const float tMin = o2::tpc::ClusterNative::unpackTime(tMinP), tMax = o2::tpc::ClusterNative::unpackTime(tMaxP);
const auto chkVal = firstIR + (tMin * constants::LHCBCPERTIMEBIN);
const auto chkExt = totalT > tMax - tMin ? ((totalT - (tMax - tMin)) * constants::LHCBCPERTIMEBIN + 1) : 0;
Expand Down Expand Up @@ -255,7 +255,7 @@ void EntropyEncoderSpec::run(ProcessingContext& pc)
}
};
unsigned int end = offsets[i][j] + clusters.nSliceRowClusters[i * GPUCA_ROW_COUNT + j];
o2::gpu::TPCClusterDecompressor::decompressHits(&clusters, offsets[i][j], end, checker);
o2::gpu::TPCClusterDecompressionCore::decompressHits(clusters, offsets[i][j], end, checker);
}
tmpBuffer[0].first.reserve(clustersFiltered.nUnattachedClusters);
tmpBuffer[0].second.reserve(clustersFiltered.nUnattachedClusters);
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -121,7 +121,7 @@ set(HDRS_INSTALL
Base/GPUReconstructionKernels.h
DataCompression/GPUTPCClusterRejection.h
DataCompression/GPUTPCCompressionKernels.inc
DataCompression/TPCClusterDecompressor.inc
DataCompression/TPCClusterDecompressionCore.inc
DataTypes/GPUdEdxInfo.h
DataTypes/GPUHostDataTypes.h
DataTypes/GPUO2DataTypes.h
Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompression.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@ class GPUTPCDecompression : public GPUProcessor
friend class GPUTPCDecompressionKernels;
friend class GPUTPCDecompressionUtilKernels;
friend class GPUChainTracking;
friend class TPCClusterDecompressionCore;

public:
#ifndef GPUCA_GPUCODE
Expand Down
116 changes: 4 additions & 112 deletions GPU/GPUTracking/DataCompression/GPUTPCDecompressionKernels.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "GPUConstantMem.h"
#include "GPUTPCCompressionTrackModel.h"
#include "GPUCommonAlgorithm.h"
#include "TPCClusterDecompressionCore.inc"

using namespace GPUCA_NAMESPACE::gpu;
using namespace o2::tpc;
Expand All @@ -31,100 +32,11 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
const uint32_t maxTime = (param.continuousMaxTimeBin + 1) * ClusterNative::scaleTimePacked - 1;

for (int32_t i = trackStart + get_global_id(0); i < trackEnd; i += get_global_size(0)) {
decompressTrack(cmprClusters, param, maxTime, i, decompressor.mAttachedClustersOffsets[i], decompressor);
uint32_t offset = decompressor.mAttachedClustersOffsets[i];
TPCClusterDecompressionCore::decompressTrack(cmprClusters, param, maxTime, i, offset, decompressor);
}
}

GPUdii() void GPUTPCDecompressionKernels::decompressTrack(CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t clusterOffset, GPUTPCDecompression& decompressor)
{
float zOffset = 0;
uint32_t slice = cmprClusters.sliceA[trackIndex];
uint32_t row = cmprClusters.rowA[trackIndex];
GPUTPCCompressionTrackModel track;
uint32_t clusterIndex;
for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) {
uint32_t pad = 0, time = 0;
if (clusterIndex != 0) {
uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1];
bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
if (changeLeg) {
tmpSlice -= GPUCA_NSLICES;
}
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
slice += tmpSlice;
if (slice >= GPUCA_NSLICES) {
slice -= GPUCA_NSLICES;
}
row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
if (row >= GPUCA_ROW_COUNT) {
row -= GPUCA_ROW_COUNT;
}
} else {
slice = tmpSlice;
row = cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
}
if (changeLeg && track.Mirror()) {
break;
}
if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) {
break;
}
uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
}
time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset)));
float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y())));
pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad);
time = time & 0xFFFFFF;
pad = (uint16_t)pad;
if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) {
if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
pad = 0;
} else {
pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1;
}
}
if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
if (time >= 0xFFFFFF - 544768) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
time = 0;
} else {
time = maxTime;
}
}
} else {
time = cmprClusters.timeA[trackIndex];
pad = cmprClusters.padA[trackIndex];
}
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, decompressor);
float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad());
float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime());
if (clusterIndex == 0) {
zOffset = z;
track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param);
}
if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) {
break;
}
clusterOffset++;
}
clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex;
}

GPUdii() ClusterNative GPUTPCDecompressionKernels::decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
{
uint32_t tmpBufferIndex = computeLinearTmpBufferIndex(slice, row, decompressor.mMaxNativeClustersPerBuffer);
uint32_t currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u);
const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]);
if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer) {
decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c;
} else {
decompressor.raiseError(GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer);
CAMath::AtomicExch(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer);
}
return c;
}

template <>
GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::step1unattached>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, int32_t sliceStart, int32_t nSlices)
{
Expand All @@ -144,7 +56,7 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
}
ClusterNative* clout = buffer + decompressor.mNativeClustersIndex[linearIndex];
uint32_t end = offsets[linearIndex] + ((linearIndex >= decompressor.mInputGPU.nSliceRows) ? 0 : decompressor.mInputGPU.nSliceRowClusters[linearIndex]);
decompressHits(cmprClusters, offsets[linearIndex], end, clout);
TPCClusterDecompressionCore::decompressHits(cmprClusters, offsets[linearIndex], end, clout);
if (processors.param.rec.tpc.clustersShiftTimebins != 0.f) {
for (uint32_t k = 0; k < outputAccess->nClusters[iSlice][iRow]; k++) {
auto& cl = buffer[k];
Expand All @@ -161,26 +73,6 @@ GPUdii() void GPUTPCDecompressionKernels::Thread<GPUTPCDecompressionKernels::ste
}
}

GPUdii() void GPUTPCDecompressionKernels::decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, ClusterNative* clusterNativeBuffer)
{
uint32_t time = 0;
uint16_t pad = 0;
for (uint32_t k = start; k < end; k++) {
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
uint32_t timeTmp = cmprClusters.timeDiffU[k];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
}
time += timeTmp;
pad += cmprClusters.padDiffU[k];
} else {
time = cmprClusters.timeDiffU[k];
pad = cmprClusters.padDiffU[k];
}
*(clusterNativeBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
}
}

template <typename T>
GPUdi() void GPUTPCDecompressionKernels::decompressorMemcpyBasic(T* GPUrestrict() dst, const T* GPUrestrict() src, uint32_t size)
{
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,6 @@ class GPUTPCDecompressionKernels : public GPUKernelTemplate

template <int32_t iKernel = defaultKernel, typename... Args>
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors, Args... args);
GPUd() static void decompressTrack(o2::tpc::CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t trackIndex, uint32_t clusterOffset, GPUTPCDecompression& decompressor);
GPUdi() static o2::tpc::ClusterNative decompressTrackStore(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor);
GPUdi() static void decompressHits(const o2::tpc::CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, o2::tpc::ClusterNative* clusterNativeBuffer);

GPUd() static uint32_t computeLinearTmpBufferIndex(uint32_t slice, uint32_t row, uint32_t maxClustersPerBuffer)
{
Expand Down
190 changes: 190 additions & 0 deletions GPU/GPUTracking/DataCompression/TPCClusterDecompressionCore.inc
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
// Copyright 2024-2025 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file TPCCLusterDecompressionCore.inc
/// \author Gabriele Cimador

#ifndef TPCCLUSTERDECOMPRESSOR_INC
#define TPCCLUSTERDECOMPRESSOR_INC

#include "GPUTPCDecompression.h"
#include "GPUConstantMem.h"
#include "GPUTPCCompressionTrackModel.h"
#include "GPUCommonAlgorithm.h"
#include "GPUO2DataTypes.h"

#ifndef GPUCA_GPUCODE
#include <functional>
#endif

using namespace o2::tpc;

namespace GPUCA_NAMESPACE::gpu
{

class TPCClusterDecompressionCore
{
public:
#ifndef GPUCA_GPUCODE
GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::function<void(const ClusterNative&, uint32_t)> func)
{
const auto cluster = ClusterNative(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]);
func(cluster, offset);
return cluster;
}

GPUhi() static const auto& decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative>& clusterVector)
{
clusterVector.emplace_back(time, clustersCompressed.flagsA[offset], pad, clustersCompressed.sigmaTimeA[offset], clustersCompressed.sigmaPadA[offset], clustersCompressed.qMaxA[offset], clustersCompressed.qTotA[offset]);
return clusterVector.back();
}

GPUhi() static auto decompressTrackStore(const CompressedClusters& clustersCompressed, const uint32_t offset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, std::vector<ClusterNative> (&clusters)[GPUCA_NSLICES][GPUCA_ROW_COUNT], std::atomic_flag (&locks)[GPUCA_NSLICES][GPUCA_ROW_COUNT])
{
std::vector<ClusterNative>& clusterVector = clusters[slice][row];
auto& lock = locks[slice][row];
while (lock.test_and_set(std::memory_order_acquire)) {
}
ClusterNative retVal = decompressTrackStore(clustersCompressed, offset, slice, row, pad, time, clusterVector);
lock.clear(std::memory_order_release);
return retVal;
}
#endif

GPUdi() static ClusterNative decompressTrackStore(const CompressedClusters& cmprClusters, const uint32_t clusterOffset, uint32_t slice, uint32_t row, uint32_t pad, uint32_t time, GPUTPCDecompression& decompressor)
{
uint32_t tmpBufferIndex = slice * (GPUCA_ROW_COUNT * decompressor.mMaxNativeClustersPerBuffer) + row * decompressor.mMaxNativeClustersPerBuffer;
uint32_t currentClusterIndex = CAMath::AtomicAdd(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), 1u);
const ClusterNative c(time, cmprClusters.flagsA[clusterOffset], pad, cmprClusters.sigmaTimeA[clusterOffset], cmprClusters.sigmaPadA[clusterOffset], cmprClusters.qMaxA[clusterOffset], cmprClusters.qTotA[clusterOffset]);
if (currentClusterIndex < decompressor.mMaxNativeClustersPerBuffer) {
decompressor.mTmpNativeClusters[tmpBufferIndex + currentClusterIndex] = c;
} else {
decompressor.raiseError(GPUErrors::ERROR_DECOMPRESSION_ATTACHED_CLUSTER_OVERFLOW, slice * 1000 + row, currentClusterIndex, decompressor.mMaxNativeClustersPerBuffer);
CAMath::AtomicExch(decompressor.mNativeClustersIndex + (slice * GPUCA_ROW_COUNT + row), decompressor.mMaxNativeClustersPerBuffer);
}
return c;
}

template <typename... Args>
GPUdi() static void decompressTrack(const CompressedClusters& cmprClusters, const GPUParam& param, const uint32_t maxTime, const uint32_t& trackIndex, uint32_t& clusterOffset, Args&... args)
{
float zOffset = 0;
uint32_t slice = cmprClusters.sliceA[trackIndex];
uint32_t row = cmprClusters.rowA[trackIndex];
GPUTPCCompressionTrackModel track;
uint32_t clusterIndex;
for (clusterIndex = 0; clusterIndex < cmprClusters.nTrackClusters[trackIndex]; clusterIndex++) {
uint32_t pad = 0, time = 0;
if (clusterIndex != 0) {
uint8_t tmpSlice = cmprClusters.sliceLegDiffA[clusterOffset - trackIndex - 1];
bool changeLeg = (tmpSlice >= GPUCA_NSLICES);
if (changeLeg) {
tmpSlice -= GPUCA_NSLICES;
}
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
slice += tmpSlice;
if (slice >= GPUCA_NSLICES) {
slice -= GPUCA_NSLICES;
}
row += cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
if (row >= GPUCA_ROW_COUNT) {
row -= GPUCA_ROW_COUNT;
}
} else {
slice = tmpSlice;
row = cmprClusters.rowDiffA[clusterOffset - trackIndex - 1];
}
if (changeLeg && track.Mirror()) {
break;
}
if (track.Propagate(param.tpcGeometry.Row2X(row), param.SliceParam[slice].Alpha)) {
break;
}
uint32_t timeTmp = cmprClusters.timeResA[clusterOffset - trackIndex - 1];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
}
time = timeTmp + ClusterNative::packTime(CAMath::Max(0.f, param.tpcGeometry.LinearZ2Time(slice, track.Z() + zOffset)));
float tmpPad = CAMath::Max(0.f, CAMath::Min((float)param.tpcGeometry.NPads(GPUCA_ROW_COUNT - 1), param.tpcGeometry.LinearY2Pad(slice, row, track.Y())));
pad = cmprClusters.padResA[clusterOffset - trackIndex - 1] + ClusterNative::packPad(tmpPad);
time = time & 0xFFFFFF;
pad = (uint16_t)pad;
if (pad >= param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked) {
if (pad >= 0xFFFF - 11968) { // Constant 11968 = (2^15 - MAX_PADS(138) * scalePadPacked(64)) / 2
pad = 0;
} else {
pad = param.tpcGeometry.NPads(row) * ClusterNative::scalePadPacked - 1;
}
}
if (param.continuousMaxTimeBin > 0 && time >= maxTime) {
if (time >= 0xFFFFFF - 544768) { // Constant 544768 = (2^23 - LHCMAXBUNCHES(3564) * MAXORBITS(256) * scaleTimePacked(64) / BCPERTIMEBIN(8)) / 2)
time = 0;
} else {
time = maxTime;
}
}
} else {
time = cmprClusters.timeA[trackIndex];
pad = cmprClusters.padA[trackIndex];
}
const auto cluster = decompressTrackStore(cmprClusters, clusterOffset, slice, row, pad, time, args...);
float y = param.tpcGeometry.LinearPad2Y(slice, row, cluster.getPad());
float z = param.tpcGeometry.LinearTime2Z(slice, cluster.getTime());
if (clusterIndex == 0) {
zOffset = z;
track.Init(param.tpcGeometry.Row2X(row), y, z - zOffset, param.SliceParam[slice].Alpha, cmprClusters.qPtA[trackIndex], param);
}
if (clusterIndex + 1 < cmprClusters.nTrackClusters[trackIndex] && track.Filter(y, z - zOffset, row)) {
break;
}
clusterOffset++;
}
clusterOffset += cmprClusters.nTrackClusters[trackIndex] - clusterIndex;
}

GPUdi() static const auto& decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, ClusterNative*& clusterBuffer)
{
return ((*(clusterBuffer++) = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k])));
}

#ifndef GPUCA_GPUCODE
GPUhi() static auto decompressHitsStore(const CompressedClusters& cmprClusters, uint32_t k, uint32_t time, uint16_t pad, std::function<void(const ClusterNative&, uint32_t)> func)
{
const auto cluster = ClusterNative(time, cmprClusters.flagsU[k], pad, cmprClusters.sigmaTimeU[k], cmprClusters.sigmaPadU[k], cmprClusters.qMaxU[k], cmprClusters.qTotU[k]);
func(cluster, k);
return cluster;
}
#endif

template <typename... Args>
GPUdi() static void decompressHits(const CompressedClusters& cmprClusters, const uint32_t start, const uint32_t end, Args&... args)
{
uint32_t time = 0;
uint16_t pad = 0;
for (uint32_t k = start; k < end; k++) {
if (cmprClusters.nComppressionModes & GPUSettings::CompressionDifferences) {
uint32_t timeTmp = cmprClusters.timeDiffU[k];
if (timeTmp & 800000) {
timeTmp |= 0xFF000000;
}
time += timeTmp;
pad += cmprClusters.padDiffU[k];
} else {
time = cmprClusters.timeDiffU[k];
pad = cmprClusters.padDiffU[k];
}
decompressHitsStore(cmprClusters, k, time, pad, args...);
}
}
};
} // namespace GPUCA_NAMESPACE::gpu

#endif
Loading
Loading