Skip to content

Commit 91d4cee

Browse files
committed
GPU: Add tpcApplyCFCutsAtDecoding option to apply cluster cuts of CF during CTF decoding
1 parent 8e75aa1 commit 91d4cee

File tree

5 files changed

+64
-26
lines changed

5 files changed

+64
-26
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -285,6 +285,7 @@ AddOption(tpcSingleSector, int32_t, -1, "", 0, "Restrict TPC processing to a sin
285285
AddOption(tpcDownscaledEdx, uint8_t, 0, "", 0, "If != 0, downscale dEdx processing (if enabled) to x %")
286286
AddOption(tpcMaxAttachedClustersPerSectorRow, uint32_t, 51000, "", 0, "Maximum number of TPC attached clusters which can be decoded per SectorRow")
287287
AddOption(tpcUseOldCPUDecoding, bool, false, "", 0, "Enable old CPU-based TPC decoding")
288+
AddOption(tpcApplyCFCutsAtDecoding, bool, false, "", 0, "Apply cluster cuts from clusterization during decoding of compressed clusters")
288289
AddOption(RTCcacheFolder, std::string, "./rtccache/", "", 0, "Folder in which the cache file is stored")
289290
AddOption(RTCprependCommand, std::string, "", "", 0, "Prepend RTC compilation commands by this string")
290291
AddOption(RTCoverrideArchitecture, std::string, "", "", 0, "Override arhcitecture part of RTC compilation command line")

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -59,31 +59,30 @@ class GPUChain
5959

6060
const GPUParam& GetParam() const { return mRec->mHostConstantMem->param; }
6161
const GPUSettingsGRP& GetGRPSettings() const { return mRec->mGRPSettings; }
62-
const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->mDeviceBackendSettings; }
63-
const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->mProcessingSettings; }
6462
const GPUCalibObjectsConst& calib() const { return processors()->calibObjects; }
6563
GPUReconstruction* rec() { return mRec; }
6664
const GPUReconstruction* rec() const { return mRec; }
6765
inline const GPUConstantMem* GetProcessors() { return mRec->processors(); }
6866

67+
// Make functions from GPUReconstruction*** available
6968
GPUReconstruction::RecoStepField GetRecoSteps() const { return mRec->GetRecoSteps(); }
7069
GPUReconstruction::RecoStepField GetRecoStepsGPU() const { return mRec->GetRecoStepsGPU(); }
7170
GPUReconstruction::InOutTypeField GetRecoStepsInputs() const { return mRec->GetRecoStepsInputs(); }
7271
GPUReconstruction::InOutTypeField GetRecoStepsOutputs() const { return mRec->GetRecoStepsOutputs(); }
72+
inline const GPUSettingsDeviceBackend& GetDeviceBackendSettings() const { return mRec->mDeviceBackendSettings; }
73+
inline const GPUSettingsProcessing& GetProcessingSettings() const { return mRec->mProcessingSettings; }
7374

7475
protected:
7576
GPUReconstructionCPU* mRec;
7677
GPUChain(GPUReconstruction* rec) : mRec((GPUReconstructionCPU*)rec) {}
7778

7879
int32_t GetThread();
79-
8080
// Make functions from GPUReconstruction*** available
8181
inline GPUConstantMem* processors() { return mRec->processors(); }
8282
inline GPUConstantMem* processorsShadow() { return mRec->mProcessorsShadow; }
8383
inline GPUConstantMem* processorsDevice() { return mRec->mDeviceConstantMem; }
8484
inline GPUParam& param() { return mRec->param(); }
8585
inline const GPUConstantMem* processors() const { return mRec->processors(); }
86-
inline GPUSettingsProcessing& ProcessingSettings() { return mRec->mProcessingSettings; }
8786
inline void SynchronizeStream(int32_t stream) { mRec->SynchronizeStream(stream); }
8887
inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); }
8988
inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true)

GPU/GPUTracking/Global/GPUChainTracking.cxx

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -309,7 +309,7 @@ bool GPUChainTracking::ValidateSettings()
309309
GPUError("Must use external output for double pipeline mode");
310310
return false;
311311
}
312-
if (ProcessingSettings().tpcCompressionGatherMode == 1) {
312+
if (GetProcessingSettings().tpcCompressionGatherMode == 1) {
313313
GPUError("Double pipeline incompatible to compression mode 1");
314314
return false;
315315
}
@@ -318,7 +318,11 @@ bool GPUChainTracking::ValidateSettings()
318318
return false;
319319
}
320320
}
321-
if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (ProcessingSettings().tpcCompressionGatherMode == 1 || ProcessingSettings().tpcCompressionGatherMode == 3)) {
321+
if ((GetRecoSteps() & GPUDataTypes::RecoStep::TPCDecompression) && GetProcessingSettings().tpcApplyCFCutsAtDecoding && !GetProcessingSettings().tpcUseOldCPUDecoding) {
322+
GPUError("tpcApplyCFCutsAtDecoding currently requires tpcUseOldCPUDecoding");
323+
return false;
324+
}
325+
if ((GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && !(GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression) && (GetProcessingSettings().tpcCompressionGatherMode == 1 || GetProcessingSettings().tpcCompressionGatherMode == 3)) {
322326
GPUError("Invalid tpcCompressionGatherMode for compression on CPU");
323327
return false;
324328
}
@@ -888,7 +892,7 @@ int32_t GPUChainTracking::RunChainFinalize()
888892
if (GetProcessingSettings().eventDisplay->getDisplayControl() == 2) {
889893
mDisplayRunning = false;
890894
GetProcessingSettings().eventDisplay->DisplayExit();
891-
ProcessingSettings().eventDisplay = nullptr;
895+
const_cast<GPUSettingsProcessing&>(GetProcessingSettings()).eventDisplay = nullptr; // TODO: fixme - eventDisplay should probably not be put into ProcessingSettings in the first place
892896
return (2);
893897
}
894898
GetProcessingSettings().eventDisplay->setDisplayControl(0);

GPU/GPUTracking/Global/GPUChainTrackingCompression.cxx

Lines changed: 51 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@ int32_t GPUChainTracking::RunTPCCompression()
4040
RecordMarker(mEvents->single, 0);
4141
}
4242

43-
if (ProcessingSettings().tpcCompressionGatherMode == 3) {
43+
if (GetProcessingSettings().tpcCompressionGatherMode == 3) {
4444
mRec->AllocateVolatileDeviceMemory(0); // make future device memory allocation volatile
4545
}
4646
SetupGPUProcessor(&Compressor, true);
@@ -73,19 +73,19 @@ int32_t GPUChainTracking::RunTPCCompression()
7373
Compressor.mOutputFlat->set(outputSize, *Compressor.mOutput);
7474
char* hostFlatPtr = (char*)Compressor.mOutput->qTotU; // First array as allocated in GPUTPCCompression::SetPointersCompressedClusters
7575
size_t copySize = 0;
76-
if (ProcessingSettings().tpcCompressionGatherMode == 3) {
76+
if (GetProcessingSettings().tpcCompressionGatherMode == 3) {
7777
CompressorShadow.mOutputA = Compressor.mOutput;
7878
copySize = AllocateRegisteredMemory(Compressor.mMemoryResOutputGPU); // We overwrite Compressor.mOutput with the allocated output pointers on the GPU
7979
}
8080
const o2::tpc::CompressedClustersPtrs* P = nullptr;
8181
HighResTimer* gatherTimer = nullptr;
8282
int32_t outputStream = 0;
83-
if (ProcessingSettings().doublePipeline) {
83+
if (GetProcessingSettings().doublePipeline) {
8484
SynchronizeStream(OutputStream()); // Synchronize output copies running in parallel from memory that might be released, only the following async copy from stacked memory is safe after the chain finishes.
8585
outputStream = OutputStream();
8686
}
87-
if (ProcessingSettings().tpcCompressionGatherMode >= 2) {
88-
if (ProcessingSettings().tpcCompressionGatherMode == 2) {
87+
if (GetProcessingSettings().tpcCompressionGatherMode >= 2) {
88+
if (GetProcessingSettings().tpcCompressionGatherMode == 2) {
8989
void* devicePtr = mRec->getGPUPointer(Compressor.mOutputFlat);
9090
if (devicePtr != Compressor.mOutputFlat) {
9191
CompressedClustersPtrs& ptrs = *Compressor.mOutput; // We need to update the ptrs with the gpu-mapped version of the host address space
@@ -97,7 +97,7 @@ int32_t GPUChainTracking::RunTPCCompression()
9797
TransferMemoryResourcesToGPU(myStep, &Compressor, outputStream);
9898
constexpr uint32_t nBlocksDefault = 2;
9999
constexpr uint32_t nBlocksMulti = 1 + 2 * 200;
100-
switch (ProcessingSettings().tpcCompressionGatherModeKernel) {
100+
switch (GetProcessingSettings().tpcCompressionGatherModeKernel) {
101101
case 0:
102102
runKernel<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::unbuffered>(GetGridBlkStep(nBlocksDefault, outputStream, RecoStep::TPCCompression));
103103
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::unbuffered>(RecoStep::TPCCompression, 0, outputSize, false);
@@ -120,10 +120,10 @@ int32_t GPUChainTracking::RunTPCCompression()
120120
getKernelTimer<GPUTPCCompressionGatherKernels, GPUTPCCompressionGatherKernels::multiBlock>(RecoStep::TPCCompression, 0, outputSize, false);
121121
break;
122122
default:
123-
GPUError("Invalid compression kernel %d selected.", (int32_t)ProcessingSettings().tpcCompressionGatherModeKernel);
123+
GPUError("Invalid compression kernel %d selected.", (int32_t)GetProcessingSettings().tpcCompressionGatherModeKernel);
124124
return 1;
125125
}
126-
if (ProcessingSettings().tpcCompressionGatherMode == 3) {
126+
if (GetProcessingSettings().tpcCompressionGatherMode == 3) {
127127
RecordMarker(mEvents->stream[outputStream], outputStream);
128128
char* deviceFlatPts = (char*)Compressor.mOutput->qTotU;
129129
if (GetProcessingSettings().doublePipeline) {
@@ -138,9 +138,9 @@ int32_t GPUChainTracking::RunTPCCompression()
138138
}
139139
} else {
140140
int8_t direction = 0;
141-
if (ProcessingSettings().tpcCompressionGatherMode == 0) {
141+
if (GetProcessingSettings().tpcCompressionGatherMode == 0) {
142142
P = &CompressorShadow.mPtrs;
143-
} else if (ProcessingSettings().tpcCompressionGatherMode == 1) {
143+
} else if (GetProcessingSettings().tpcCompressionGatherMode == 1) {
144144
P = &Compressor.mPtrs;
145145
direction = -1;
146146
gatherTimer = &getTimer<GPUTPCCompressionKernels>("GPUTPCCompression_GatherOnCPU", 0);
@@ -184,11 +184,11 @@ int32_t GPUChainTracking::RunTPCCompression()
184184
GPUMemCpyAlways(myStep, O->timeA, P->timeA, O->nTracks * sizeof(O->timeA[0]), outputStream, direction);
185185
GPUMemCpyAlways(myStep, O->padA, P->padA, O->nTracks * sizeof(O->padA[0]), outputStream, direction);
186186
}
187-
if (ProcessingSettings().tpcCompressionGatherMode == 1) {
187+
if (GetProcessingSettings().tpcCompressionGatherMode == 1) {
188188
gatherTimer->Stop();
189189
}
190190
mIOPtrs.tpcCompressedClusters = Compressor.mOutputFlat;
191-
if (ProcessingSettings().tpcCompressionGatherMode == 3) {
191+
if (GetProcessingSettings().tpcCompressionGatherMode == 3) {
192192
SynchronizeEventAndRelease(mEvents->stream[outputStream]);
193193
mRec->ReturnVolatileDeviceMemory();
194194
}
@@ -209,18 +209,52 @@ int32_t GPUChainTracking::RunTPCDecompression()
209209
if (GetProcessingSettings().tpcUseOldCPUDecoding) {
210210
const auto& threadContext = GetThreadContext();
211211
TPCClusterDecompressor decomp;
212-
auto allocator = [this](size_t size) {
212+
auto allocatorFinal = [this](size_t size) {
213213
this->mInputsHost->mNClusterNative = this->mInputsShadow->mNClusterNative = size;
214214
this->AllocateRegisteredMemory(this->mInputsHost->mResourceClusterNativeOutput, this->mSubOutputControls[GPUTrackingOutputs::getIndex(&GPUTrackingOutputs::clustersNative)]);
215215
return this->mInputsHost->mPclusterNativeOutput;
216216
};
217-
auto& gatherTimer = getTimer<TPCClusterDecompressor>("TPCDecompression", 0);
218-
gatherTimer.Start();
219-
if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocator, param(), GetProcessingSettings().deterministicGPUReconstruction)) {
217+
std::unique_ptr<ClusterNative[]> tmpBuffer;
218+
auto allocatorTmp = [&tmpBuffer](size_t size) {
219+
return ((tmpBuffer = std::make_unique<ClusterNative[]>(size))).get();
220+
};
221+
auto& decompressTimer = getTimer<TPCClusterDecompressor>("TPCDecompression", 0);
222+
auto allocatorUse = GetProcessingSettings().tpcApplyCFCutsAtDecoding ? std::function<ClusterNative*(size_t)>{allocatorTmp} : std::function<ClusterNative*(size_t)>{allocatorFinal};
223+
decompressTimer.Start();
224+
if (decomp.decompress(mIOPtrs.tpcCompressedClusters, *mClusterNativeAccess, allocatorUse, param(), GetProcessingSettings().deterministicGPUReconstruction)) {
220225
GPUError("Error decompressing clusters");
221226
return 1;
222227
}
223-
gatherTimer.Stop();
228+
if (GetProcessingSettings().tpcApplyCFCutsAtDecoding) {
229+
ClusterNative* outputBuffer;
230+
for (int32_t iPhase = 0; iPhase < 2; iPhase++) {
231+
uint32_t countTotal = 0;
232+
for (uint32_t iSector = 0; iSector < GPUCA_NSLICES; iSector++) {
233+
for (uint32_t iRow = 0; iRow < GPUCA_ROW_COUNT; iRow++) {
234+
uint32_t count = 0;
235+
for (uint32_t k = 0; k < mClusterNativeAccess->nClusters[iSector][iRow]; k++) {
236+
const ClusterNative& cl = mClusterNativeAccess->clusters[iSector][iRow][k];
237+
bool keep = cl.qTot > param().rec.tpc.cfQTotCutoff && cl.qMax > param().rec.tpc.cfQMaxCutoff && (cl.sigmaPadPacked || !(cl.getFlags() & ClusterNative::flagSingle) || cl.qMax > param().rec.tpc.cfQMaxCutoffSinglePad) && (cl.sigmaTimePacked || !(cl.getFlags() & ClusterNative::flagSingle) || cl.qMax > param().rec.tpc.cfQMaxCutoffSingleTime);
238+
count += keep;
239+
countTotal += keep;
240+
if (iPhase) {
241+
outputBuffer[countTotal] = cl;
242+
}
243+
}
244+
if (iPhase) {
245+
mClusterNativeAccess->nClusters[iSector][iRow] = count;
246+
}
247+
}
248+
}
249+
if (iPhase) {
250+
mClusterNativeAccess->clustersLinear = outputBuffer;
251+
mClusterNativeAccess->setOffsetPtrs();
252+
} else {
253+
outputBuffer = allocatorFinal(countTotal);
254+
}
255+
}
256+
}
257+
decompressTimer.Stop();
224258
mIOPtrs.clustersNative = mClusterNativeAccess.get();
225259
if (mRec->IsGPU()) {
226260
AllocateRegisteredMemory(mInputsHost->mResourceClusterNativeBuffer);

GPU/GPUTracking/Global/GPUChainTrackingDebugAndProfiling.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -206,7 +206,7 @@ void GPUChainTracking::PrintOutputStat()
206206
{
207207
int32_t nTracks = 0, nAttachedClusters = 0, nAttachedClustersFitted = 0, nAdjacentClusters = 0;
208208
uint32_t nCls = GetProcessingSettings().doublePipeline ? mIOPtrs.clustersNative->nClustersTotal : GetTPCMerger().NMaxClusters();
209-
if (ProcessingSettings().createO2Output > 1) {
209+
if (GetProcessingSettings().createO2Output > 1) {
210210
nTracks = mIOPtrs.nOutputTracksTPCO2;
211211
nAttachedClusters = mIOPtrs.nMergedTrackHits;
212212
} else {
@@ -244,7 +244,7 @@ void GPUChainTracking::PrintOutputStat()
244244
}
245245
snprintf(trdText, 1024, " - TRD Tracker reconstructed %d tracks (%d tracklets)", nTRDTracks, nTRDTracklets);
246246
}
247-
GPUInfo("Output Tracks: %d (%d / %d / %d / %d clusters (fitted / attached / adjacent / total) - %s format)%s", nTracks, nAttachedClustersFitted, nAttachedClusters, nAdjacentClusters, nCls, ProcessingSettings().createO2Output > 1 ? "O2" : "GPU", trdText);
247+
GPUInfo("Output Tracks: %d (%d / %d / %d / %d clusters (fitted / attached / adjacent / total) - %s format)%s", nTracks, nAttachedClustersFitted, nAttachedClusters, nAdjacentClusters, nCls, GetProcessingSettings().createO2Output > 1 ? "O2" : "GPU", trdText);
248248
}
249249

250250
void GPUChainTracking::SanityCheck()

0 commit comments

Comments
 (0)