Skip to content

Commit 84eac06

Browse files
committed
Initial set of bug.fixes and cosmetic changes
1 parent b5ab60d commit 84eac06

File tree

6 files changed

+101
-136
lines changed

6 files changed

+101
-136
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -614,7 +614,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
614614

615615
#ifdef GPUCA_HAS_ONNX
616616
if (GetProcessingSettings().nn.applyNNclusterizer) {
617-
uint32_t maxClusters = -1;
617+
uint32_t maxClusters = 0;
618618
for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) {
619619
maxClusters = std::max(maxClusters, processors()->tpcClusterer[iSector].mNMaxClusters);
620620
}
@@ -918,6 +918,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
918918
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
919919
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
920920
GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN);
921+
int withMC = (doGPU && propagateMCLabels);
921922

922923
if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
923924
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
@@ -930,23 +931,23 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
930931
size_t iSize = CAMath::Min((uint)clustererNN.nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
931932

932933
auto start0 = std::chrono::high_resolution_clock::now();
933-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNN>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Filling the data
934+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNN>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, batchStart); // Filling the data
934935

935936
auto stop0 = std::chrono::high_resolution_clock::now();
936937
auto start1 = std::chrono::high_resolution_clock::now();
937938
nnApplication.networkInference(nnApplication.model_class, clustererNN, iSize, clustererNN.modelProbabilities, clustererNN.nnClusterizerDtype);
938939
if (nnApplication.model_class.getNumOutputNodes()[0][1] == 1) {
939-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels
940+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, batchStart); // Assigning class labels
940941
} else {
941-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Assigning class labels
942+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, batchStart); // Assigning class labels
942943
}
943944

944945
if (!clustererNN.nnClusterizerUseCfRegression) {
945946
nnApplication.networkInference(nnApplication.model_reg_1, clustererNN, iSize, clustererNN.outputDataReg1, clustererNN.nnClusterizerDtype);
946-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 1
947+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, batchStart); // Running the NN for regression class 1
947948
if (nnApplication.model_class.getNumOutputNodes()[0][1] > 1 && nnApplication.reg_model_paths.size() > 1) {
948949
nnApplication.networkInference(nnApplication.model_reg_2, clustererNN, iSize, clustererNN.outputDataReg2, clustererNN.nnClusterizerDtype);
949-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, batchStart); // Running the NN for regression class 2
950+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, batchStart); // Running the NN for regression class 2
950951
}
951952
}
952953
auto stop1 = std::chrono::high_resolution_clock::now();
@@ -956,7 +957,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
956957
}
957958
auto start1 = std::chrono::high_resolution_clock::now();
958959
if (clustererNN.nnClusterizerUseCfRegression) {
959-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
960+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNN.nnClusterizerDtype, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
960961
}
961962
auto stop1 = std::chrono::high_resolution_clock::now();
962963
time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 21 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -24,25 +24,29 @@ void GPUTPCNNClusterizer::SetMaxData(const GPUTrackingInOutPointers& io) {}
2424

2525
void* GPUTPCNNClusterizer::setIOPointers(void* mem)
2626
{
27-
if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0) {
28-
computePointerWithAlignment(mem, inputData16, nnClusterizerBatchedMode * nnClusterizerElementSize);
29-
} else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0) {
30-
computePointerWithAlignment(mem, inputData32, nnClusterizerBatchedMode * nnClusterizerElementSize);
31-
}
32-
computePointerWithAlignment(mem, peakPositions, nnClusterizerBatchedMode);
33-
computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode);
34-
computePointerWithAlignment(mem, centralCharges, nnClusterizerBatchedMode);
35-
computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters);
36-
if (nnClusterizerModelClassNumOutputNodes > 0) {
37-
computePointerWithAlignment(mem, modelProbabilities, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes);
38-
}
39-
if (!nnClusterizerUseCfRegression) {
40-
if (nnClusterizerModelReg1NumOutputNodes > 0) {
41-
computePointerWithAlignment(mem, outputDataReg1, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes);
27+
if (nnClusterizerBatchedMode > 0){
28+
if (nnClusterizerDtype == 0 && nnClusterizerElementSize > 0) {
29+
computePointerWithAlignment(mem, inputData16, nnClusterizerBatchedMode * nnClusterizerElementSize);
30+
} else if (nnClusterizerDtype == 1 && nnClusterizerElementSize > 0) {
31+
computePointerWithAlignment(mem, inputData32, nnClusterizerBatchedMode * nnClusterizerElementSize);
4232
}
43-
if (nnClusterizerModelReg2NumOutputNodes > 0) {
44-
computePointerWithAlignment(mem, outputDataReg2, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes);
33+
computePointerWithAlignment(mem, peakPositions, nnClusterizerBatchedMode);
34+
computePointerWithAlignment(mem, clusterFlags, 2 * nnClusterizerBatchedMode);
35+
computePointerWithAlignment(mem, centralCharges, nnClusterizerBatchedMode);
36+
if (nnClusterizerModelClassNumOutputNodes > 0) {
37+
computePointerWithAlignment(mem, modelProbabilities, nnClusterizerBatchedMode * nnClusterizerModelClassNumOutputNodes);
4538
}
39+
if (!nnClusterizerUseCfRegression) {
40+
if (nnClusterizerModelReg1NumOutputNodes > 0) {
41+
computePointerWithAlignment(mem, outputDataReg1, nnClusterizerBatchedMode * nnClusterizerModelReg1NumOutputNodes);
42+
}
43+
if (nnClusterizerModelReg2NumOutputNodes > 0) {
44+
computePointerWithAlignment(mem, outputDataReg2, nnClusterizerBatchedMode * nnClusterizerModelReg2NumOutputNodes);
45+
}
46+
}
47+
}
48+
if (nnClusterizerTotalClusters > 0) {
49+
computePointerWithAlignment(mem, outputDataClass, nnClusterizerTotalClusters);
4650
}
4751
return mem;
4852
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ class GPUTPCNNClusterizer : public GPUProcessor
4242
int nnClusterizerSizeInputTime = 3;
4343
int nnClusterizerElementSize = -1;
4444
bool nnClusterizerAddIndexData = true;
45-
float nnClassThreshold = 0.16;
45+
float nnClassThreshold = 0.01;
4646
bool nnSigmoidTrafoClassThreshold = 1;
4747
int nnClusterizerUseCfRegression = 0;
4848
int nnClusterizerBatchedMode = 1;
@@ -58,7 +58,6 @@ class GPUTPCNNClusterizer : public GPUProcessor
5858
int mISector = -1;
5959

6060
// Memory allocation for neural network
61-
uint class2_elements = 0;
6261
float* inputData32 = nullptr;
6362
OrtDataType::Float16_t* inputData16 = nullptr;
6463
float* outputDataClass = nullptr;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
/// \file GPUTPCNNClusterizerHost.cxx
1313
/// \author Christian Sonnabend
1414

15+
#include <CommonUtils/StringUtils.h>
16+
1517
#include "GPUTPCNNClusterizerHost.h"
1618
#include "GPUTPCNNClusterizer.h"
1719
#include "GPUSettings.h"
@@ -37,7 +39,7 @@ GPUTPCNNClusterizerHost::GPUTPCNNClusterizerHost(const GPUSettingsProcessingNNcl
3739
model_class.init(OrtOptions);
3840
clusterer.nnClusterizerModelClassNumOutputNodes = model_class.getNumOutputNodes()[0][1];
3941

40-
reg_model_paths = splitString(settings.nnRegressionPath, ":");
42+
reg_model_paths = o2::utils::Str::tokenize(settings.nnRegressionPath, ':');
4143

4244
if (!settings.nnClusterizerUseCfRegression) {
4345
if (model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.h

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -44,23 +44,6 @@ class GPUTPCNNClusterizerHost
4444
std::unordered_map<std::string, std::string> OrtOptions;
4545
o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
4646
std::vector<std::string> reg_model_paths;
47-
48-
private:
49-
// Avoid including CommonUtils/StringUtils.h
50-
std::vector<std::string> splitString(const std::string& input, const std::string& delimiter)
51-
{
52-
std::vector<std::string> tokens;
53-
std::size_t pos = 0;
54-
std::size_t found;
55-
56-
while ((found = input.find(delimiter, pos)) != std::string::npos) {
57-
tokens.push_back(input.substr(pos, found - pos));
58-
pos = found + delimiter.length();
59-
}
60-
tokens.push_back(input.substr(pos));
61-
62-
return tokens;
63-
}
6447
}; // class GPUTPCNNClusterizerHost
6548

6649
} // namespace o2::gpu

0 commit comments

Comments
 (0)