Skip to content

Commit a6bbedb

Browse files
committed
Please consider the following formatting changes
1 parent 08753dd commit a6bbedb

File tree

5 files changed

+69
-69
lines changed

5 files changed

+69
-69
lines changed

Common/ML/src/OrtInterface.cxx

Lines changed: 10 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -303,7 +303,7 @@ std::vector<OrtDataType::Float16_t> OrtModel::inference<float, OrtDataType::Floa
303303
return outputValuesVec;
304304
}
305305

306-
template <>// class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
306+
template <> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
307307
float* OrtModel::inference(float* input, size_t input_size)
308308
{
309309
std::vector<int64_t> inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
@@ -315,7 +315,7 @@ float* OrtModel::inference(float* input, size_t input_size)
315315
return outputValues;
316316
}
317317

318-
template <>// class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
318+
template <> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
319319
float* OrtModel::inference(OrtDataType::Float16_t* input, size_t input_size)
320320
{
321321
std::vector<int64_t> inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
@@ -327,34 +327,30 @@ float* OrtModel::inference(OrtDataType::Float16_t* input, size_t input_size)
327327
return outputValues;
328328
}
329329

330-
template <>// class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
330+
template <> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
331331
void OrtModel::inference(float* input, size_t input_size, float* output)
332332
{
333333
std::vector<int64_t> inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
334334
Ort::Value inputTensor = Ort::Value::CreateTensor<float>(pImplOrt->memoryInfo, input, input_size, inputShape.data(), inputShape.size());
335-
335+
336336
std::vector<int64_t> outputShape{inputShape[0], mOutputShapes[0][1]};
337337
size_t outputSize = (int64_t)((input_size / mInputShapes[0][1]) * outputShape[1]);
338338
Ort::Value outputTensor = Ort::Value::CreateTensor<float>(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size());
339-
340-
(pImplOrt->session)->Run(pImplOrt->runOptions,
341-
inputNamesChar.data(), &inputTensor, 1,
342-
outputNamesChar.data(), &outputTensor, 1);
339+
340+
(pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, 1);
343341
}
344342

345-
template <>// class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
343+
template <> // class I is the input data type, e.g. float, class O is the output data type, e.g. O2::gpu::OrtDataType::Float16_t from O2/GPU/GPUTracking/ML/convert_float16.h
346344
void OrtModel::inference(OrtDataType::Float16_t* input, size_t input_size, float* output)
347345
{
348346
std::vector<int64_t> inputShape{(int64_t)(input_size / mInputShapes[0][1]), (int64_t)mInputShapes[0][1]};
349347
Ort::Value inputTensor = Ort::Value::CreateTensor<Ort::Float16_t>(pImplOrt->memoryInfo, reinterpret_cast<Ort::Float16_t*>(input), input_size, inputShape.data(), inputShape.size());
350-
348+
351349
std::vector<int64_t> outputShape{inputShape[0], mOutputShapes[0][1]};
352350
size_t outputSize = (int64_t)((input_size / mInputShapes[0][1]) * outputShape[1]);
353351
Ort::Value outputTensor = Ort::Value::CreateTensor<float>(pImplOrt->memoryInfo, output, outputSize, outputShape.data(), outputShape.size());
354-
355-
(pImplOrt->session)->Run(pImplOrt->runOptions,
356-
inputNamesChar.data(), &inputTensor, 1,
357-
outputNamesChar.data(), &outputTensor, 1);
352+
353+
(pImplOrt->session)->Run(pImplOrt->runOptions, inputNamesChar.data(), &inputTensor, 1, outputNamesChar.data(), &outputTensor, 1);
358354
}
359355

360356
template <>

GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -163,20 +163,20 @@ class GPUTPCClusterFinder : public GPUProcessor
163163
int nnClusterizerModelReg1NumOutputNodes = -1;
164164
int nnClusterizerModelReg2NumOutputNodes = -1;
165165
uint nnClusterizerCurrentSize = -1; // This variable determines the size of the memory pointers. It will be set at runtime.
166-
int nnClusterizerDtype = 0; // 0: float16, 1: float32
166+
int nnClusterizerDtype = 0; // 0: float16, 1: float32
167167

168168
// Memory allocation for neural network
169169
uint class2_elements = 0;
170-
float* inputData32=nullptr;
171-
OrtDataType::Float16_t* inputData16=nullptr;
172-
float* outputDataClass=nullptr;
173-
float* modelProbabilities=nullptr;
174-
float* outputDataReg1=nullptr;
175-
float* outputDataReg2=nullptr;
176-
177-
ChargePos* peakPositions=nullptr;
178-
bool* clusterFlags=nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx
179-
float* centralCharges=nullptr;
170+
float* inputData32 = nullptr;
171+
OrtDataType::Float16_t* inputData16 = nullptr;
172+
float* outputDataClass = nullptr;
173+
float* modelProbabilities = nullptr;
174+
float* outputDataReg1 = nullptr;
175+
float* outputDataReg2 = nullptr;
176+
177+
ChargePos* peakPositions = nullptr;
178+
bool* clusterFlags = nullptr; // mSplitInTime, mSplitInPad. Techincally both flags are set in the same way -> ClusterAccumulator.cx=nullptrx
179+
float* centralCharges = nullptr;
180180

181181
#ifndef GPUCA_GPUCODE
182182
void DumpDigits(std::ostream& out);

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 21 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -146,9 +146,9 @@ GPUd() void GPUTPCNNClusterizer::fillInputData(int32_t nBlocks, int32_t nThreads
146146
for (int t = -clusterer.nnClusterizerSizeInputTime; t <= clusterer.nnClusterizerSizeInputTime; t++) {
147147
if (!is_boundary) {
148148
ChargePos tmp_pos(row + r, pad + p, time + t);
149-
if (r == 0 && !clusterer.clusterFlags[2*glo_idx] && std::abs(p) < 3 && std::abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
150-
clusterer.clusterFlags[2*glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]);
151-
clusterer.clusterFlags[2*glo_idx + 1] = clusterer.clusterFlags[2*glo_idx];
149+
if (r == 0 && !clusterer.clusterFlags[2 * glo_idx] && std::abs(p) < 3 && std::abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
150+
clusterer.clusterFlags[2 * glo_idx] = CfUtils::isPeak(isPeakMap[tmp_pos]);
151+
clusterer.clusterFlags[2 * glo_idx + 1] = clusterer.clusterFlags[2 * glo_idx];
152152
}
153153
if (dtype == 0) {
154154
clusterer.inputData16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
@@ -218,12 +218,12 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg1(uint glo_idx, GPUSharedMemo
218218
}
219219

220220
pc.setFull(clusterer.centralCharges[glo_idx] * clusterer.outputDataReg1[model_output_index + 4],
221-
static_cast<float>(clusterer.peakPositions[glo_idx].pad()) + clusterer.outputDataReg1[model_output_index],
222-
clusterer.outputDataReg1[model_output_index + 2],
223-
static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clusterer.peakPositions[glo_idx].time()) + clusterer.outputDataReg1[model_output_index + 1],
224-
clusterer.outputDataReg1[model_output_index + 3],
225-
clusterer.clusterFlags[2*glo_idx],
226-
clusterer.clusterFlags[2*glo_idx + 1]);
221+
static_cast<float>(clusterer.peakPositions[glo_idx].pad()) + clusterer.outputDataReg1[model_output_index],
222+
clusterer.outputDataReg1[model_output_index + 2],
223+
static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clusterer.peakPositions[glo_idx].time()) + clusterer.outputDataReg1[model_output_index + 1],
224+
clusterer.outputDataReg1[model_output_index + 3],
225+
clusterer.clusterFlags[2 * glo_idx],
226+
clusterer.clusterFlags[2 * glo_idx + 1]);
227227

228228
tpc::ClusterNative myCluster;
229229
bool rejectCluster = !pc.toNative(clusterer.peakPositions[glo_idx], clusterer.centralCharges[glo_idx], myCluster, clusterer.Param());
@@ -296,12 +296,12 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg2(uint glo_idx, GPUSharedMemo
296296

297297
// Cluster 1
298298
pc.setFull(clusterer.centralCharges[glo_idx] * clusterer.outputDataReg2[model_output_index + 8],
299-
static_cast<float>(clusterer.peakPositions[glo_idx].pad()) + clusterer.outputDataReg2[model_output_index],
300-
clusterer.outputDataReg2[model_output_index + 4],
301-
static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clusterer.peakPositions[glo_idx].time()) + clusterer.outputDataReg2[model_output_index + 2],
302-
clusterer.outputDataReg2[model_output_index + 6],
303-
clusterer.clusterFlags[2*glo_idx],
304-
clusterer.clusterFlags[2*glo_idx + 1]);
299+
static_cast<float>(clusterer.peakPositions[glo_idx].pad()) + clusterer.outputDataReg2[model_output_index],
300+
clusterer.outputDataReg2[model_output_index + 4],
301+
static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clusterer.peakPositions[glo_idx].time()) + clusterer.outputDataReg2[model_output_index + 2],
302+
clusterer.outputDataReg2[model_output_index + 6],
303+
clusterer.clusterFlags[2 * glo_idx],
304+
clusterer.clusterFlags[2 * glo_idx + 1]);
305305

306306
tpc::ClusterNative myCluster;
307307
bool rejectCluster = !pc.toNative(clusterer.peakPositions[glo_idx], clusterer.centralCharges[glo_idx], myCluster, clusterer.Param());
@@ -331,12 +331,12 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg2(uint glo_idx, GPUSharedMemo
331331

332332
// Cluster 2
333333
pc.setFull(clusterer.centralCharges[glo_idx] * clusterer.outputDataReg2[model_output_index + 9],
334-
static_cast<float>(clusterer.peakPositions[glo_idx].pad()) + clusterer.outputDataReg2[model_output_index + 1],
335-
clusterer.outputDataReg2[model_output_index + 5],
336-
static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clusterer.peakPositions[glo_idx].time()) + clusterer.outputDataReg2[model_output_index + 3],
337-
clusterer.outputDataReg2[model_output_index + 7],
338-
clusterer.clusterFlags[2*glo_idx],
339-
clusterer.clusterFlags[2*glo_idx + 1]);
334+
static_cast<float>(clusterer.peakPositions[glo_idx].pad()) + clusterer.outputDataReg2[model_output_index + 1],
335+
clusterer.outputDataReg2[model_output_index + 5],
336+
static_cast<float>((clusterer.mPmemory->fragment).start) + static_cast<float>(clusterer.peakPositions[glo_idx].time()) + clusterer.outputDataReg2[model_output_index + 3],
337+
clusterer.outputDataReg2[model_output_index + 7],
338+
clusterer.clusterFlags[2 * glo_idx],
339+
clusterer.clusterFlags[2 * glo_idx + 1]);
340340

341341
rejectCluster = !pc.toNative(clusterer.peakPositions[glo_idx], clusterer.centralCharges[glo_idx], myCluster, clusterer.Param());
342342
if (rejectCluster) {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerInternals.cxx

Lines changed: 20 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,22 +16,22 @@
1616

1717
using namespace o2::gpu;
1818

19-
GPUTPCNNClusterizerInternals::GPUTPCNNClusterizerInternals(GPUSettingsProcessing settings, processorType& clusterer) {
19+
GPUTPCNNClusterizerInternals::GPUTPCNNClusterizerInternals(GPUSettingsProcessing settings, processorType& clusterer)
20+
{
2021
clusterer_internal = &clusterer;
2122
GPUSettingsProcessingNNclusterizer nn_settings = settings.nn;
2223
OrtOptions = {{"model-path", nn_settings.nnClassificationPath},
23-
{"device", nn_settings.nnInferenceDevice},
24-
{"device-id", std::to_string(nn_settings.nnInferenceDeviceId)},
25-
{"allocate-device-memory", std::to_string(nn_settings.nnInferenceAllocateDevMem)},
26-
{"dtype", nn_settings.nnInferenceDtype},
27-
{"intra-op-num-threads", std::to_string(nn_settings.nnInferenceThreadsPerNN)},
28-
{"enable-optimizations", std::to_string(nn_settings.nnInferenceEnableOrtOptimization)},
29-
{"enable-profiling", std::to_string(nn_settings.nnInferenceOrtProfiling)},
30-
{"profiling-output-path", nn_settings.nnInferenceOrtProfilingPath},
31-
{"logging-level", std::to_string(nn_settings.nnInferenceVerbosity)}};
24+
{"device", nn_settings.nnInferenceDevice},
25+
{"device-id", std::to_string(nn_settings.nnInferenceDeviceId)},
26+
{"allocate-device-memory", std::to_string(nn_settings.nnInferenceAllocateDevMem)},
27+
{"dtype", nn_settings.nnInferenceDtype},
28+
{"intra-op-num-threads", std::to_string(nn_settings.nnInferenceThreadsPerNN)},
29+
{"enable-optimizations", std::to_string(nn_settings.nnInferenceEnableOrtOptimization)},
30+
{"enable-profiling", std::to_string(nn_settings.nnInferenceOrtProfiling)},
31+
{"profiling-output-path", nn_settings.nnInferenceOrtProfilingPath},
32+
{"logging-level", std::to_string(nn_settings.nnInferenceVerbosity)}};
3233
sector = clusterer.mISector;
3334

34-
3535
model_class.init(OrtOptions);
3636
reg_model_paths = splitString(nn_settings.nnRegressionPath, ":");
3737

@@ -51,24 +51,26 @@ GPUTPCNNClusterizerInternals::GPUTPCNNClusterizerInternals(GPUSettingsProcessing
5151
}
5252
}
5353

54-
void* GPUTPCNNClusterizerInternals::setIOPointers(void* mem) {
55-
if (clusterer_internal->nnClusterizerDtype == 0){
56-
computePointerWithAlignment(mem, clusterer_internal->inputData16, clusterer_internal->nnClusterizerCurrentSize * clusterer_internal->nnClusterizerElementSize);
57-
} else if (clusterer_internal->nnClusterizerDtype == 1){
58-
computePointerWithAlignment(mem, clusterer_internal->inputData32, clusterer_internal->nnClusterizerCurrentSize * clusterer_internal->nnClusterizerElementSize);
54+
void* GPUTPCNNClusterizerInternals::setIOPointers(void* mem)
55+
{
56+
if (clusterer_internal->nnClusterizerDtype == 0) {
57+
computePointerWithAlignment(mem, clusterer_internal->inputData16, clusterer_internal->nnClusterizerCurrentSize * clusterer_internal->nnClusterizerElementSize);
58+
} else if (clusterer_internal->nnClusterizerDtype == 1) {
59+
computePointerWithAlignment(mem, clusterer_internal->inputData32, clusterer_internal->nnClusterizerCurrentSize * clusterer_internal->nnClusterizerElementSize);
5960
}
6061
computePointerWithAlignment(mem, clusterer_internal->outputDataClass, clusterer_internal->nnClusterizerCurrentSize);
6162
computePointerWithAlignment(mem, clusterer_internal->modelProbabilities, clusterer_internal->nnClusterizerCurrentSize * clusterer_internal->nnClusterizerModelClassNumOutputNodes);
6263
computePointerWithAlignment(mem, clusterer_internal->outputDataReg1, clusterer_internal->nnClusterizerCurrentSize * clusterer_internal->nnClusterizerModelReg1NumOutputNodes);
6364
computePointerWithAlignment(mem, clusterer_internal->outputDataReg2, clusterer_internal->nnClusterizerCurrentSize * clusterer_internal->nnClusterizerModelReg2NumOutputNodes);
6465
computePointerWithAlignment(mem, clusterer_internal->peakPositions, clusterer_internal->nnClusterizerCurrentSize);
65-
computePointerWithAlignment(mem, clusterer_internal->clusterFlags, 2*clusterer_internal->nnClusterizerCurrentSize);
66+
computePointerWithAlignment(mem, clusterer_internal->clusterFlags, 2 * clusterer_internal->nnClusterizerCurrentSize);
6667
computePointerWithAlignment(mem, clusterer_internal->centralCharges, clusterer_internal->nnClusterizerCurrentSize);
6768

6869
return mem;
6970
}
7071

71-
void GPUTPCNNClusterizerInternals::RegisterMemoryAllocation() {
72+
void GPUTPCNNClusterizerInternals::RegisterMemoryAllocation()
73+
{
7274
AllocateAndInitializeLate();
7375
int32_t memType = GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_STACK;
7476
mMemoryId = mRec->RegisterMemoryAllocation(this, &GPUTPCNNClusterizerInternals::setIOPointers, memType, "TPCNNClusterer", GPUMemoryReuse{GPUMemoryReuse::REUSE_1TO1, GPUMemoryReuse::NNClusterer, (uint16_t)(sector % mRec->GetProcessingSettings().nTPCClustererLanes)});

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerInternals.h

Lines changed: 7 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,7 @@ namespace o2::gpu
3030
class GPUTPCNNClusterizerInternals : public GPUProcessor
3131
{
3232
public:
33-
typedef GPUTPCClusterFinder processorType;
33+
typedef GPUTPCClusterFinder processorType;
3434
GPUTPCNNClusterizerInternals() = default;
3535
GPUTPCNNClusterizerInternals(GPUSettingsProcessing, processorType&);
3636
void* setIOPointers(void*);
@@ -42,20 +42,22 @@ class GPUTPCNNClusterizerInternals : public GPUProcessor
4242
std::unordered_map<std::string, std::string> OrtOptions;
4343
o2::ml::OrtModel model_class, model_reg_1, model_reg_2; // For splitting clusters
4444
std::vector<std::string> reg_model_paths;
45+
4546
private:
46-
processorType* clusterer_internal;
47+
processorType* clusterer_internal;
4748
int sector = -1;
4849
int16_t mMemoryId = -1;
4950

5051
// Avoid including CommonUtils/StringUtils.h
51-
std::vector<std::string> splitString(const std::string& input, const std::string& delimiter) {
52+
std::vector<std::string> splitString(const std::string& input, const std::string& delimiter)
53+
{
5254
std::vector<std::string> tokens;
5355
std::size_t pos = 0;
5456
std::size_t found;
5557

5658
while ((found = input.find(delimiter, pos)) != std::string::npos) {
57-
tokens.push_back(input.substr(pos, found - pos));
58-
pos = found + delimiter.length();
59+
tokens.push_back(input.substr(pos, found - pos));
60+
pos = found + delimiter.length();
5961
}
6062
tokens.push_back(input.substr(pos));
6163

0 commit comments

Comments
 (0)