Skip to content

Commit d0daea3

Browse files
NN clusterizer: Bug fixes for MC labels (#14677)
* Bug-fix for MC labels * Switch on timers on CPU for the first four lanes * Improved boundary checking for input filling and cluster publishing * Please consider the following formatting changes --------- Co-authored-by: ALICE Action Bot <alibuild@cern.ch>
1 parent f2e0f3d commit d0daea3

File tree

6 files changed

+155
-35
lines changed

6 files changed

+155
-35
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 10 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -709,19 +709,21 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
709709
LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;
710710
}
711711
});
712+
const int16_t maxFragmentLen = GetProcessingSettings().overrideClusterizerFragmentLen;
713+
const uint32_t maxAllowedTimebin = param().par.continuousTracking ? std::max<int32_t>(param().continuousMaxTimeBin, maxFragmentLen) : TPC_MAX_TIME_BIN_TRIGGERED;
712714
for (int32_t sector = 0; sector < NSECTORS; sector++) {
713715
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[sector];
714716
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[sector] : clustererNN;
715717
int32_t lane = sector % numLanes;
716718
clustererNN.mDeviceId = deviceId;
717719
clustererNN.mISector = sector;
718720
clustererNN.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters;
719-
nnApplications[lane].initClusterizer(nn_settings, clustererNN);
721+
nnApplications[lane].initClusterizer(nn_settings, clustererNN, maxFragmentLen, maxAllowedTimebin);
720722
if (doGPU) {
721723
clustererNNShadow.mDeviceId = deviceId;
722724
clustererNNShadow.mISector = sector;
723725
clustererNNShadow.mNnClusterizerTotalClusters = processors()->tpcClusterer[lane].mNMaxClusters;
724-
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow);
726+
nnApplications[lane].initClusterizer(nn_settings, clustererNNShadow, maxFragmentLen, maxAllowedTimebin);
725727
}
726728
if (nn_settings.nnClusterizerVerbosity > 2) {
727729
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Processor initialized. Sector " << sector << ", lane " << lane << ", max clusters " << clustererNN.mNnClusterizerTotalClusters << " (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
@@ -1051,7 +1053,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10511053

10521054
// NN evaluations
10531055
if(clustererNNShadow.mNnClusterizerUseClassification) {
1054-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Start(); }
1056+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); }
10551057
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10561058
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10571059
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
@@ -1065,13 +1067,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10651067
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
10661068
}
10671069
}
1068-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Stop(); }
1070+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
10691071
if (nn_settings.nnClusterizerVerbosity > 3) {
10701072
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN classification inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
10711073
}
10721074
}
10731075
if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
1074-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Start(); }
1076+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); }
10751077
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10761078
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10771079
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
@@ -1085,9 +1087,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10851087
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
10861088
}
10871089
}
1088-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Stop(); }
1090+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); }
10891091
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
1090-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Start(); }
1092+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); }
10911093
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10921094
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10931095
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
@@ -1101,7 +1103,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11011103
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
11021104
}
11031105
}
1104-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Stop(); }
1106+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); }
11051107
}
11061108
if (nn_settings.nnClusterizerVerbosity > 3) {
11071109
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN regression inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,6 +60,10 @@ class GPUTPCNNClusterizer : public GPUProcessor
6060
int32_t mISector = -1;
6161
int32_t mDeviceId = -1;
6262

63+
// charge array boundaries
64+
int32_t maxFragmentLen = -1;
65+
int32_t maxAllowedTimebin = -1; // == tpcMaxTimeBin
66+
6367
// GPU optimizations
6468
uint32_t mNnClusterizerFullRowSize = 0;
6569
uint32_t mNnClusterizerFullPadSize = 0;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
#include "GPUReconstruction.h"
2222
#include "GPUTPCGeometry.h"
2323
#include "DataFormatsTPC/Constants.h"
24+
#include "clusterFinderDefs.h"
2425

2526
#ifdef GPUCA_HAS_ONNX
2627
#include <onnxruntime_cxx_api.h>
@@ -84,7 +85,7 @@ void GPUTPCNNClusterizerHost::init(const GPUSettingsProcessingNNclusterizer& set
8485
}
8586
}
8687

87-
void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN)
88+
void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclusterizer& settings, GPUTPCNNClusterizer& clustererNN, int32_t maxFragmentLen, int32_t maxAllowedTimebin)
8889
{
8990
clustererNN.mNnClusterizerUseCfRegression = settings.nnClusterizerUseCfRegression;
9091
clustererNN.mNnClusterizerSizeInputRow = settings.nnClusterizerSizeInputRow;
@@ -109,6 +110,8 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
109110
clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
110111
clustererNN.mNnClusterizerUseClassification = settings.nnClusterizerUseClassification;
111112
clustererNN.mNnClusterizerSetDeconvolutionFlags = (bool)settings.nnClusterizerSetDeconvolutionFlags;
113+
clustererNN.maxFragmentLen = maxFragmentLen == -1 ? TPC_MAX_FRAGMENT_LEN_GPU : maxFragmentLen;
114+
clustererNN.maxAllowedTimebin = maxAllowedTimebin == -1 ? TPC_MAX_FRAGMENT_LEN_GPU : maxAllowedTimebin;
112115
if (clustererNN.mNnSigmoidTrafoClassThreshold) {
113116
clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
114117
} else {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ class GPUTPCNNClusterizerHost
4848
GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNclusterizer& settings, bool useDeterministicMode = false) { init(settings, useDeterministicMode); }
4949

5050
void init(const GPUSettingsProcessingNNclusterizer&, bool = false);
51-
void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&);
51+
void initClusterizer(const GPUSettingsProcessingNNclusterizer&, GPUTPCNNClusterizer&, int32_t = -1, int32_t = -1);
5252
void createBoundary(GPUTPCNNClusterizer&);
5353
void createIndexLookup(GPUTPCNNClusterizer&);
5454

0 commit comments

Comments
 (0)