Skip to content

Commit 05a64bb

Browse files
committed
Furhter fixes and beautifications
1 parent ff34a9d commit 05a64bb

File tree

5 files changed

+32
-31
lines changed

5 files changed

+32
-31
lines changed

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,7 @@ AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters wil
275275
AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")
276276
AddOption(nnSigmoidTrafoClassThreshold, int, 1, "", 0, "If true (default), then the classification threshold is transformed by an inverse sigmoid function. This depends on how the network was trained (with a sigmoid as acitvation function in the last layer or not).")
277277
AddOption(nnEvalMode, std::string, "c1:r1", "", 0, "Concatention of modes, e.g. c1:r1 (classification class 1, regression class 1)")
278+
AddOption(nnClusterizerUseClassification, int, 1, "", 0, "If 1, the classification output of the network is used to select clusters, else only the regression output is used and no clusters are rejected by classification")
278279
// CCDB
279280
AddOption(nnLoadFromCCDB, int, 0, "", 0, "If 1 networks are fetched from ccdb, else locally")
280281
AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched")

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 19 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,23 +1034,25 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10341034
}
10351035

10361036
// NN evaluations
1037-
if(GetProcessingSettings().debugLevel >= 1) { nnTimers[3*lane]->Start(); }
1038-
if (clustererNNShadow.mNnInferenceInputDType == 0) {
1039-
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
1040-
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
1041-
} else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
1042-
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32);
1043-
}
1044-
} else if (clustererNNShadow.mNnInferenceInputDType == 1) {
1045-
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
1046-
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16);
1047-
} else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
1048-
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
1037+
if(clustererNNShadow.mNnClusterizerUseClassification) {
1038+
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Start(); }
1039+
if (clustererNNShadow.mNnInferenceInputDType == 0) {
1040+
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
1041+
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
1042+
} else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
1043+
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_32);
1044+
}
1045+
} else if (clustererNNShadow.mNnInferenceInputDType == 1) {
1046+
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
1047+
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_16);
1048+
} else if (clustererNNShadow.mNnInferenceOutputDType == 1) {
1049+
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
1050+
}
10491051
}
10501052
}
1051-
if(GetProcessingSettings().debugLevel >= 1) { nnTimers[3*lane]->Stop(); }
1053+
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Stop(); }
10521054
if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
1053-
if(GetProcessingSettings().debugLevel >= 1) { nnTimers[3*lane + 1]->Start(); }
1055+
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Start(); }
10541056
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10551057
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10561058
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
@@ -1064,9 +1066,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10641066
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
10651067
}
10661068
}
1067-
if(GetProcessingSettings().debugLevel >= 1) { nnTimers[3*lane + 1]->Stop(); }
1069+
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Stop(); }
10681070
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
1069-
if(GetProcessingSettings().debugLevel >= 1) { nnTimers[3*lane + 2]->Start(); }
1071+
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Start(); }
10701072
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10711073
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10721074
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
@@ -1080,7 +1082,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10801082
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
10811083
}
10821084
}
1083-
if(GetProcessingSettings().debugLevel >= 1) { nnTimers[3*lane + 2]->Stop(); }
1085+
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Stop(); }
10841086
}
10851087
}
10861088

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ class GPUTPCNNClusterizer : public GPUProcessor
4343
int32_t mNnClusterizerChargeArraySize = -1;
4444
int32_t mNnClusterizerElementSize = -1;
4545
int8_t mNnClusterizerAddIndexData = 1;
46+
int8_t mNnClusterizerUseClassification = 1;
4647
float mNnClassThreshold = 0.01;
4748
int8_t mNnSigmoidTrafoClassThreshold = 1;
4849
int8_t mNnClusterizerSetDeconvolutionFlags = 1;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
106106
clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
107107
clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
108108
clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
109+
clustererNN.mNnClusterizerUseClassification = settings.nnClusterizerUseClassification;
109110
clustererNN.mNnClusterizerSetDeconvolutionFlags = (bool)settings.nnClusterizerSetDeconvolutionFlags;
110111
if (clustererNN.mNnSigmoidTrafoClassThreshold) {
111112
clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -117,18 +117,14 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
117117
}
118118

119119
if (clustererNN.mNnClusterizerAddIndexData) {
120-
float sector_norm = sector / 36.f;
121-
float row_norm = row / 152.f;
122-
float pad_norm = static_cast<float>(pad) / GPUTPCGeometry::NPads(row);
123-
124120
if (dtype == 0) {
125-
clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)sector_norm;
126-
clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)row_norm;
127-
clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)pad_norm;
121+
clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(sector) / o2::tpc::constants::MAXSECTOR);
122+
clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(static_cast<float>(row) / o2::tpc::constants::MAXGLOBALPADROW);
123+
clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast<float>(pad) / GPUTPCGeometry::NPads(row));
128124
} else {
129-
clustererNN.mInputData_32[write_idx] = sector_norm;
130-
clustererNN.mInputData_32[write_idx + 1] = row_norm;
131-
clustererNN.mInputData_32[write_idx + 2] = pad_norm;
125+
clustererNN.mInputData_32[write_idx] = static_cast<float>(sector) / o2::tpc::constants::MAXSECTOR;
126+
clustererNN.mInputData_32[write_idx + 1] = static_cast<float>(row) / o2::tpc::constants::MAXGLOBALPADROW;
127+
clustererNN.mInputData_32[write_idx + 2] = static_cast<float>(pad) / GPUTPCGeometry::NPads(row);
132128
}
133129
}
134130

@@ -178,8 +174,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
178174
uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize + data_idx;
179175

180176
float index_values[3] = {
181-
sector / 36.f,
182-
row / 152.f,
177+
static_cast<float>(sector) / o2::tpc::constants::MAXSECTOR,
178+
static_cast<float>(row) / o2::tpc::constants::MAXGLOBALPADROW,
183179
static_cast<float>(pad) / GPUTPCGeometry::NPads(row)};
184180

185181
if (dtype == 0) {
@@ -339,7 +335,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
339335

340336
// LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.mNnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size();
341337

342-
if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerModelReg2NumOutputNodes != -1 && clustererNN.mOutputDataClass[full_glo_idx] >= 1)) {
338+
if (clustererNN.mOutputDataClass[full_glo_idx] == 1 || (clustererNN.mNnClusterizerUseClassification <= 0)) {
343339

344340
ClusterAccumulator pc;
345341

@@ -451,7 +447,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
451447

452448
uint32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg2NumOutputNodes;
453449

454-
if (clustererNN.mOutputDataClass[full_glo_idx] > 0) {
450+
if ((clustererNN.mOutputDataClass[full_glo_idx] > 0) || (clustererNN.mNnClusterizerUseClassification <= 0)) {
455451

456452
ClusterAccumulator pc;
457453

0 commit comments

Comments
 (0)