Skip to content

Commit db0c836

Browse files
committed
Please consider the following formatting changes
1 parent 99ca93b commit db0c836

File tree

4 files changed

+54
-51
lines changed

4 files changed

+54
-51
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -893,7 +893,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
893893
(clusterer.nnInternals)->nnClusterizerElementSize = ((2 * (clusterer.nnInternals)->nnClusterizerSizeInputRow + 1) * (2 * (clusterer.nnInternals)->nnClusterizerSizeInputPad + 1) * (2 * (clusterer.nnInternals)->nnClusterizerSizeInputTime + 1)) + ((clusterer.nnInternals)->nnClusterizerAddIndexData ? 3 : 0);
894894
(clusterer.nnInternals)->nnClusterizerBatchedMode = nn_settings.nnClusterizerBatchedMode;
895895
(clusterer.nnInternals)->nnClusterizerBoundaryFillValue = nn_settings.nnClusterizerBoundaryFillValue;
896-
if (nn_settings.nnClusterizerVerbosity < 0){
896+
if (nn_settings.nnClusterizerVerbosity < 0) {
897897
(clusterer.nnInternals)->nnClusterizerVerbosity = nn_settings.nnInferenceVerbosity;
898898
} else {
899899
(clusterer.nnInternals)->nnClusterizerVerbosity = nn_settings.nnClusterizerVerbosity;
@@ -929,7 +929,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
929929
(clusterer.nnInternals)->model_reg_2.init((clusterer.nnInternals)->OrtOptions);
930930
}
931931
}
932-
932+
933933
if ((clusterer.nnInternals)->nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
934934
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSlice}});
935935
DoDebugAndDump(RecoStep::TPCClusterFinding, 262144 << 4, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
@@ -944,15 +944,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
944944
int evalDtype = (clusterer.nnInternals)->OrtOptions["dtype"].find("32") != std::string::npos;
945945
(clusterer.nnInternals)->outputDataClass.resize(clusterer.mPmemory->counters.nClusters, -1);
946946

947-
for(int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / (clusterer.nnInternals)->nnClusterizerBatchedMode); batch++) {
947+
for (int batch = 0; batch < std::ceil((float)clusterer.mPmemory->counters.nClusters / (clusterer.nnInternals)->nnClusterizerBatchedMode); batch++) {
948948
uint batchStart = batch * (clusterer.nnInternals)->nnClusterizerBatchedMode;
949949
uint iSize = CAMath::Min((uint)(clusterer.nnInternals)->nnClusterizerBatchedMode, (uint)(clusterer.mPmemory->counters.nClusters - batchStart));
950950

951951
(clusterer.nnInternals)->clusterFlags.clear();
952952
(clusterer.nnInternals)->peakPositions.clear();
953953
(clusterer.nnInternals)->centralCharges.clear();
954954

955-
(clusterer.nnInternals)->clusterFlags.resize(iSize, {0,0});
955+
(clusterer.nnInternals)->clusterFlags.resize(iSize, {0, 0});
956956
(clusterer.nnInternals)->peakPositions.resize(iSize);
957957
(clusterer.nnInternals)->centralCharges.resize(iSize);
958958

@@ -968,7 +968,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
968968

969969
auto start1 = std::chrono::high_resolution_clock::now();
970970
GPUTPCNNClusterizer::inferenceNetworkClass(clusterer, evalDtype);
971-
if ((clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1] == 1){
971+
if ((clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1] == 1) {
972972
runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass1Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, batchStart); // Assigning class labels
973973
} else {
974974
runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass2Labels>({GetGrid(iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, batchStart); // Assigning class labels
@@ -989,7 +989,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
989989
}
990990

991991
auto start1 = std::chrono::high_resolution_clock::now();
992-
if((clusterer.nnInternals)->nnClusterizerUseCfRegression) {
992+
if ((clusterer.nnInternals)->nnClusterizerUseCfRegression) {
993993
runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, evalDtype, 0, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
994994
}
995995
auto stop1 = std::chrono::high_resolution_clock::now();

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 15 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,8 +61,8 @@ GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::determineClass2La
6161
uint elem_iterator = glo_idx * (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1];
6262
float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
6363
uint class_label = 0;
64-
for(float pIdx = elem_iterator; pIdx < elem_iterator + (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1]; pIdx++) {
65-
if(pIdx == elem_iterator) {
64+
for (float pIdx = elem_iterator; pIdx < elem_iterator + (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1]; pIdx++) {
65+
if (pIdx == elem_iterator) {
6666
current_max_prob = (clusterer.nnInternals)->modelProbabilities[pIdx];
6767
} else {
6868
class_label = ((clusterer.nnInternals)->modelProbabilities[pIdx] > current_max_prob ? pIdx : class_label);
@@ -93,24 +93,27 @@ GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::publishClass2Regr
9393
}
9494

9595
// Apply the neural network to the input data. Note: These are not GPU kernels. We let ONNX take care of that
96-
void GPUTPCNNClusterizer::inferenceNetworkClass(processorType& clusterer, int8_t dtype, uint batch_idx) {
97-
if(dtype == 0){
96+
void GPUTPCNNClusterizer::inferenceNetworkClass(processorType& clusterer, int8_t dtype, uint batch_idx)
97+
{
98+
if (dtype == 0) {
9899
(clusterer.nnInternals)->modelProbabilities = (clusterer.nnInternals)->model_class.inference<OrtDataType::Float16_t, float>((clusterer.nnInternals)->inputData16);
99100
} else {
100101
(clusterer.nnInternals)->modelProbabilities = (clusterer.nnInternals)->model_class.inference<float, float>((clusterer.nnInternals)->inputData32);
101102
}
102103
}
103104

104-
void GPUTPCNNClusterizer::inferenceNetworkReg1(processorType& clusterer, int8_t dtype) {
105-
if(dtype == 0){
105+
void GPUTPCNNClusterizer::inferenceNetworkReg1(processorType& clusterer, int8_t dtype)
106+
{
107+
if (dtype == 0) {
106108
(clusterer.nnInternals)->outputDataReg1 = (clusterer.nnInternals)->model_reg_1.inference<OrtDataType::Float16_t, float>((clusterer.nnInternals)->inputData16);
107109
} else {
108110
(clusterer.nnInternals)->outputDataReg1 = (clusterer.nnInternals)->model_reg_1.inference<float, float>((clusterer.nnInternals)->inputData32);
109111
}
110112
}
111113

112-
void GPUTPCNNClusterizer::inferenceNetworkReg2(processorType& clusterer, int8_t dtype) {
113-
if(dtype == 0){
114+
void GPUTPCNNClusterizer::inferenceNetworkReg2(processorType& clusterer, int8_t dtype)
115+
{
116+
if (dtype == 0) {
114117
(clusterer.nnInternals)->outputDataReg2 = (clusterer.nnInternals)->model_reg_2.inference<OrtDataType::Float16_t, float>((clusterer.nnInternals)->inputData16);
115118
} else {
116119
(clusterer.nnInternals)->outputDataReg2 = (clusterer.nnInternals)->model_reg_2.inference<float, float>((clusterer.nnInternals)->inputData32);
@@ -171,18 +174,18 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
171174
for (int t = -(clusterer.nnInternals)->nnClusterizerSizeInputTime; t <= (clusterer.nnInternals)->nnClusterizerSizeInputTime; t++) {
172175
if (!is_boundary) {
173176
ChargePos tmp_pos(row + r, pad + p, time + t);
174-
if (r == 0 && !(clusterer.nnInternals)->clusterFlags[glo_idx][0] && std::abs(p) < 3 && std::abs(t) < 3 && p!=0 && t!=0) { // ordering is done for short circuit optimization
177+
if (r == 0 && !(clusterer.nnInternals)->clusterFlags[glo_idx][0] && std::abs(p) < 3 && std::abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
175178
(clusterer.nnInternals)->clusterFlags[glo_idx][0] = CfUtils::isPeak(isPeakMap[tmp_pos]);
176179
(clusterer.nnInternals)->clusterFlags[glo_idx][1] = (clusterer.nnInternals)->clusterFlags[glo_idx][0];
177180
}
178-
if(dtype == 0){
181+
if (dtype == 0) {
179182
(clusterer.nnInternals)->inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
180183
} else {
181184
(clusterer.nnInternals)->inputData32[write_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
182185
}
183186
} else {
184187
// Filling boundary just to make sure that no values are left unintentionally
185-
if(dtype == 0){
188+
if (dtype == 0) {
186189
(clusterer.nnInternals)->inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>((clusterer.nnInternals)->nnClusterizerBoundaryFillValue));
187190
} else {
188191
(clusterer.nnInternals)->inputData32[write_idx] = static_cast<float>((clusterer.nnInternals)->nnClusterizerBoundaryFillValue);
@@ -193,7 +196,7 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
193196
}
194197
}
195198
if ((clusterer.nnInternals)->nnClusterizerAddIndexData) {
196-
if(dtype == 0){
199+
if (dtype == 0) {
197200
(clusterer.nnInternals)->inputData16[write_idx] = (OrtDataType::Float16_t)(clusterer.mISlice / 36.f);
198201
(clusterer.nnInternals)->inputData16[write_idx + 1] = (OrtDataType::Float16_t)(row / 152.f);
199202
(clusterer.nnInternals)->inputData16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast<float>(pad) / clusterer.Param().tpcGeometry.NPads(row));

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 4 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -74,12 +74,11 @@ class GPUTPCNNClusterizer : public GPUKernelTemplate
7474
static void inferenceNetworkClass(processorType&, int8_t = 0, uint = 0);
7575
static void inferenceNetworkReg1(processorType&, int8_t = 0);
7676
static void inferenceNetworkReg2(processorType&, int8_t = 0);
77-
78-
private:
7977

80-
static int padOffset(int, int, const GPUTPCGeometry&);
81-
static int rowOffset(int, int);
82-
static bool isBoundary(int, int, int, const GPUTPCGeometry&);
78+
private:
79+
static int padOffset(int, int, const GPUTPCGeometry&);
80+
static int rowOffset(int, int);
81+
static bool isBoundary(int, int, int, const GPUTPCGeometry&);
8382
};
8483

8584
} // namespace o2::gpu

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerInternals.h

Lines changed: 29 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -22,34 +22,35 @@
2222
namespace o2::gpu
2323
{
2424

25-
class GPUTPCNNClusterizerInternals {
26-
public:
27-
int nnClusterizerSizeInputRow = 3;
28-
int nnClusterizerSizeInputPad = 3;
29-
int nnClusterizerSizeInputTime = 3;
30-
int nnClusterizerElementSize = -1;
31-
bool nnClusterizerAddIndexData = true;
32-
float nnClassThreshold = 0.16;
33-
bool nnSigmoidTrafoClassThreshold = 1;
34-
int nnClusterizerUseCfRegression = 0;
35-
int nnClusterizerBatchedMode = 1;
36-
int nnClusterizerVerbosity = 0;
37-
int nnClusterizerBoundaryFillValue = -1;
38-
int nnClusterizerDumpDigits = 0;
39-
int nnClusterizerApplyCfDeconvolution = 0;
40-
41-
// Memory allocation for neural network
42-
uint class2_elements = 0;
43-
std::vector<float> inputData32;
44-
std::vector<OrtDataType::Float16_t> inputData16;
45-
std::vector<float> outputDataClass, modelProbabilities, outputDataReg1, outputDataReg2;
46-
47-
std::vector<ChargePos> peakPositions;
48-
std::vector<std::vector<bool>> clusterFlags; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cxx
49-
std::vector<float> centralCharges;
50-
51-
std::unordered_map<std::string, std::string> OrtOptions;
52-
o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
25+
class GPUTPCNNClusterizerInternals
26+
{
27+
public:
28+
int nnClusterizerSizeInputRow = 3;
29+
int nnClusterizerSizeInputPad = 3;
30+
int nnClusterizerSizeInputTime = 3;
31+
int nnClusterizerElementSize = -1;
32+
bool nnClusterizerAddIndexData = true;
33+
float nnClassThreshold = 0.16;
34+
bool nnSigmoidTrafoClassThreshold = 1;
35+
int nnClusterizerUseCfRegression = 0;
36+
int nnClusterizerBatchedMode = 1;
37+
int nnClusterizerVerbosity = 0;
38+
int nnClusterizerBoundaryFillValue = -1;
39+
int nnClusterizerDumpDigits = 0;
40+
int nnClusterizerApplyCfDeconvolution = 0;
41+
42+
// Memory allocation for neural network
43+
uint class2_elements = 0;
44+
std::vector<float> inputData32;
45+
std::vector<OrtDataType::Float16_t> inputData16;
46+
std::vector<float> outputDataClass, modelProbabilities, outputDataReg1, outputDataReg2;
47+
48+
std::vector<ChargePos> peakPositions;
49+
std::vector<std::vector<bool>> clusterFlags; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cxx
50+
std::vector<float> centralCharges;
51+
52+
std::unordered_map<std::string, std::string> OrtOptions;
53+
o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
5354
}; // class GPUTPCNNClusterizerInternals
5455

5556
} // namespace o2::gpu

0 commit comments

Comments
 (0)