Skip to content

Commit ba7056b

Browse files
committed
TPC: Add option to write reduced clusterNative, removing rejected clusters
1 parent d45eac1 commit ba7056b

File tree

10 files changed

+65
-32
lines changed

10 files changed

+65
-32
lines changed

GPU/GPUTracking/DataCompression/GPUTPCCompression.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -89,6 +89,7 @@ class GPUTPCCompression : public GPUProcessor
8989
void SetPointersCompressedClusters(void*& mem, T& c, uint32_t nClA, uint32_t nTr, uint32_t nClU, bool reducedClA);
9090
template <class T>
9191
GPUd() static void truncateSignificantBits(T& val, uint32_t nBits, uint32_t max);
92+
GPUd() bool rejectCluster(int32_t idx, GPUParam& param, const GPUTrackingInOutPointers& ioPtrs);
9293

9394
int16_t mMemoryResOutputHost = -1;
9495
int16_t mMemoryResOutputGPU = -1;

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 26 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -183,6 +183,31 @@ GPUd() bool GPUTPCCompressionKernels::GPUTPCCompressionKernels_Compare<4>::opera
183183
return mClsPtr[a].qTot < mClsPtr[b].qTot;
184184
}
185185

186+
GPUd() bool GPUTPCCompression::rejectCluster(int32_t idx, GPUParam& GPUrestrict() param, const GPUTrackingInOutPointers& GPUrestrict() ioPtrs)
187+
{
188+
if (mClusterStatus[idx]) {
189+
return true;
190+
}
191+
int32_t attach = ioPtrs.mergedTrackHitAttachment[idx];
192+
bool unattached = attach == 0;
193+
194+
if (unattached) {
195+
if (param.rec.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyB) {
196+
return true;
197+
}
198+
} else if (param.rec.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyA) {
199+
if (GPUTPCClusterRejection::GetIsRejected(attach)) {
200+
return true;
201+
}
202+
int32_t id = attach & gputpcgmmergertypes::attachTrackMask;
203+
auto& trk = ioPtrs.mergedTracks[id];
204+
if (CAMath::Abs(trk.GetParam().GetQPt() * param.qptB5Scaler) > param.rec.tpc.rejectQPtB5 || trk.MergedLooper()) {
205+
return true;
206+
}
207+
}
208+
return false;
209+
}
210+
186211
template <>
187212
GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1unattached>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& GPUrestrict() processors)
188213
{
@@ -208,33 +233,7 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
208233
const uint32_t nn = CAMath::nextMultipleOf<GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached)>(clusters->nClusters[iSector][iRow]);
209234
for (uint32_t i = iThread; i < nn + nThreads; i += nThreads) {
210235
const int32_t idx = idOffset + i;
211-
int32_t storeCluster = 0;
212-
do {
213-
if (i >= clusters->nClusters[iSector][iRow]) {
214-
break;
215-
}
216-
if (compressor.mClusterStatus[idx]) {
217-
break;
218-
}
219-
int32_t attach = ioPtrs.mergedTrackHitAttachment[idx];
220-
bool unattached = attach == 0;
221-
222-
if (unattached) {
223-
if (processors.param.rec.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyB) {
224-
break;
225-
}
226-
} else if (processors.param.rec.tpc.rejectionStrategy >= GPUSettings::RejectionStrategyA) {
227-
if (GPUTPCClusterRejection::GetIsRejected(attach)) {
228-
break;
229-
}
230-
int32_t id = attach & gputpcgmmergertypes::attachTrackMask;
231-
auto& trk = ioPtrs.mergedTracks[id];
232-
if (CAMath::Abs(trk.GetParam().GetQPt() * processors.param.qptB5Scaler) > processors.param.rec.tpc.rejectQPtB5 || trk.MergedLooper()) {
233-
break;
234-
}
235-
}
236-
storeCluster = 1;
237-
} while (false);
236+
int32_t storeCluster = i < clusters->nClusters[iSector][iRow] && !compressor.rejectCluster(idx, param, ioPtrs);
238237

239238
GPUbarrier();
240239
int32_t myIndex = work_group_scan_inclusive_add(storeCluster);

GPU/GPUTracking/DataTypes/GPUDataTypes.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,7 @@ struct GPUTrackingInOutPointers {
245245
uint32_t nOutputClusRefsTPCO2 = 0;
246246
const o2::MCCompLabel* outputTracksTPCO2MC = nullptr;
247247
const o2::tpc::CompressedClustersFlat* tpcCompressedClusters = nullptr;
248+
const o2::tpc::ClusterNativeAccess* clustersNativeReduced = nullptr;
248249

249250
// TPC links
250251
int32_t* tpcLinkITS = nullptr;

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -361,6 +361,7 @@ AddOption(tpcMaxAttachedClustersPerSectorRow, uint32_t, 51000, "", 0, "Maximum n
361361
AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC decoding")
362362
AddOption(tpcApplyCFCutsAtDecoding, bool, false, "", 0, "Apply cluster cuts from clusterization during decoding of compressed clusters")
363363
AddOption(tpcApplyClusterFilterOnCPU, uint8_t, 0, "", 0, "Apply custom cluster filter of GPUTPCClusterFilter class, 0: off, 1: debug, 2: PbPb23")
364+
AddOption(tpcWriteClustersAfterRejection, bool, false, "", 0, "Apply TPC rejection strategy before writing clusters")
364365
AddOption(oclPlatformNum, int32_t, -1, "", 0, "Platform to use, in case the backend provides multiple platforms (OpenCL only, -1 = auto-select, -2 query all platforms (also incompatible))")
365366
AddOption(oclCompileFromSources, bool, false, "", 0, "Compile OpenCL binary from included source code instead of using included spirv code")
366367
AddOption(oclOverrideSourceBuildFlags, std::string, "", "", 0, "Override OCL build flags for compilation from source, put a space for empty options")

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,10 @@ bool GPUChainTracking::ValidateSettings()
273273
GPUError("Clusterizer and merger Sanity checks only supported when not running on GPU");
274274
return false;
275275
}
276+
if (GetProcessingSettings().tpcWriteClustersAfterRejection && (mRec->IsGPU() || param().rec.tpc.compressionTypeMask || !(GetRecoSteps() & GPUDataTypes::RecoStep::TPCCompression))) {
277+
GPUError("tpcWriteClustersAfterRejection requires compressionTypeMask = 0, no GPU usage, and compression enabled");
278+
return false;
279+
}
276280
if (GetProcessingSettings().doublePipeline) {
277281
if (!GetRecoStepsOutputs().isOnlySet(GPUDataTypes::InOutType::TPCMergedTracks, GPUDataTypes::InOutType::TPCCompressedClusters, GPUDataTypes::InOutType::TPCClusters)) {
278282
GPUError("Invalid outputs for double pipeline mode 0x%x", (uint32_t)GetRecoStepsOutputs());

GPU/GPUTracking/Global/GPUChainTracking.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -305,6 +305,7 @@ class GPUChainTracking : public GPUChain
305305
void RunTPCTrackingMerger_Resolve(int8_t useOrigTrackParam, int8_t mergeAll, GPUReconstruction::krnlDeviceType deviceType);
306306
void RunTPCClusterFilter(o2::tpc::ClusterNativeAccess* clusters, std::function<o2::tpc::ClusterNative*(size_t)> allocator, bool applyClusterCuts);
307307
bool NeedTPCClustersOnGPU();
308+
void WriteReducedClusters();
308309
template <int32_t I>
309310
int32_t RunTRDTrackingInternal();
310311
uint32_t StreamForSector(uint32_t sector) const;

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -772,7 +772,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
772772
GPUFatal("Cannot use waitForFinalInput callback without delayed output");
773773
}
774774
if (!GetProcessingSettings().tpcApplyClusterFilterOnCPU) {
775-
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
775+
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, GetProcessingSettings().tpcWriteClustersAfterRejection ? nullptr : mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
776776
tmpNativeClusters = mInputsHost->mPclusterNativeOutput;
777777
} else {
778778
tmpNativeClusterBuffer = std::make_unique<ClusterNative[]>(mInputsHost->mNClusterNative);
@@ -1269,7 +1269,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
12691269
// TODO: write to buffer directly
12701270
o2::dataformats::MCTruthContainer<o2::MCCompLabel> mcLabels;
12711271
std::pair<ConstMCLabelContainer*, ConstMCLabelContainerView*> buffer;
1272-
if (mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)] && mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->useExternal()) {
1272+
if (!GetProcessingSettings().tpcWriteClustersAfterRejection && mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)] && mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->useExternal()) {
12731273
if (!mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)]->allocator) {
12741274
throw std::runtime_error("Cluster MC Label buffer missing");
12751275
}
@@ -1293,7 +1293,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
12931293

12941294
if (buildNativeHost && buildNativeGPU && GetProcessingSettings().delayedOutput) {
12951295
mInputsHost->mNClusterNative = mInputsShadow->mNClusterNative = nClsTotal;
1296-
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
1296+
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeOutput, GetProcessingSettings().tpcWriteClustersAfterRejection ? nullptr : mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
12971297
tmpNativeClusters = mInputsHost->mPclusterNativeOutput;
12981298
for (uint32_t i = outputQueueStart; i < mOutputQueue.size(); i++) {
12991299
mOutputQueue[i].dst = (char*)tmpNativeClusters + (size_t)mOutputQueue[i].dst;
@@ -1308,7 +1308,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
13081308
if (GetProcessingSettings().tpcApplyClusterFilterOnCPU) {
13091309
auto allocator = [this, &tmpNativeClusters](size_t size) {
13101310
this->mInputsHost->mNClusterNative = size;
1311-
this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
1311+
this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->GetProcessingSettings().tpcWriteClustersAfterRejection ? nullptr : this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
13121312
return (tmpNativeClusters = this->mInputsHost->mPclusterNativeOutput);
13131313
};
13141314
RunTPCClusterFilter(tmpNativeAccess, allocator, false);

GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "GPUConstantMem.h" // TODO: Try to get rid of as many GPUConstantMem includes as possible!
2424
#include "GPUTPCCompressionKernels.h"
2525
#include "GPUTPCDecompressionKernels.h"
26+
#include "SimulationDataFormat/ConstMCTruthContainer.h"
2627
#include "utils/strtag.h"
2728

2829
#include <numeric>
@@ -52,6 +53,9 @@ int32_t GPUChainTracking::RunTPCCompression()
5253
TransferMemoryResourcesToGPU(myStep, &Compressor, 0);
5354
runKernel<GPUMemClean16>(GetGridAutoStep(0, RecoStep::TPCCompression), CompressorShadow.mClusterStatus, Compressor.mMaxClusters * sizeof(CompressorShadow.mClusterStatus[0]));
5455
runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step0attached>(GetGridAuto(0));
56+
if (GetProcessingSettings().tpcWriteClustersAfterRejection) {
57+
WriteReducedClusters();
58+
}
5559
runKernel<GPUTPCCompressionKernels, GPUTPCCompressionKernels::step1unattached>(GetGridAuto(0));
5660
TransferMemoryResourcesToHost(myStep, &Compressor, 0);
5761
#ifdef GPUCA_TPC_GEOMETRY_O2
@@ -434,3 +438,20 @@ int32_t GPUChainTracking::RunTPCDecompression()
434438
DoDebugDump(GPUChainTrackingDebugFlags::TPCDecompressedClusters, &GPUChainTracking::DumpClusters, *mDebugFile, mIOPtrs.clustersNative);
435439
return 0;
436440
}
441+
442+
void GPUChainTracking::WriteReducedClusters()
443+
{
444+
GPUOutputControl* clOutput = mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)];
445+
if (!clOutput || !clOutput->allocator) {
446+
throw std::runtime_error("No output allocator for clusterNative available");
447+
}
448+
auto* clBuffer = (ClusterNative*)clOutput->allocator(mIOPtrs.clustersNative->nClustersTotal * sizeof(ClusterNative));
449+
450+
GPUOutputControl* labelOutput = mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clusterLabels)];
451+
if (!labelOutput || !labelOutput->allocator) {
452+
throw std::runtime_error("No output allocator for clusterNative labels available");
453+
}
454+
455+
ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer* labelContainer = reinterpret_cast<ClusterNativeAccess::ConstMCLabelContainerViewWithBuffer*>(labelOutput->allocator(0));
456+
std::pair<o2::dataformats::ConstMCLabelContainer*, o2::dataformats::ConstMCLabelContainerView*> labelBuffer = {&labelContainer->first, &labelContainer->second};
457+
}

GPU/GPUTracking/Global/GPUChainTrackingMerger.cxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,9 @@ int32_t GPUChainTracking::RunTPCTrackingMerger(bool synchronizeOutput)
293293
mRec->PushNonPersistentMemory(qStr2Tag("TPCMERG2"));
294294
AllocateRegisteredMemory(Merger.MemoryResOutputO2Scratch());
295295
WriteToConstantMemory(RecoStep::TPCMerging, (char*)&processors()->tpcMerger - (char*)processors(), &MergerShadow, sizeof(MergerShadow), 0);
296-
runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(GetGridAuto(0, deviceType));
296+
if (!GetProcessingSettings().tpcWriteClustersAfterRejection) {
297+
runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::prepare>(GetGridAuto(0, deviceType));
298+
}
297299
TransferMemoryResourceLinkToHost(RecoStep::TPCMerging, Merger.MemoryResMemory(), 0, &mEvents->single);
298300
runKernel<GPUTPCGMO2Output, GPUTPCGMO2Output::sort>(GetGridAuto(0, deviceType));
299301
mRec->ReturnVolatileDeviceMemory();

GPU/Workflow/src/GPUWorkflowSpec.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -889,6 +889,9 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc)
889889

890890
// ------------------------------ Varios postprocessing steps ------------------------------
891891

892+
if (mConfig->configProcessing.tpcWriteClustersAfterRejection) {
893+
ptrs.clustersNative = ptrs.clustersNativeReduced;
894+
}
892895
bool createEmptyOutput = false;
893896
if (retVal != 0) {
894897
if (retVal == 3 && mConfig->configProcessing.ignoreNonFatalGPUErrors) {

0 commit comments

Comments
 (0)