Skip to content

Commit bedb592

Browse files
committed
Fixing compile issues, only thing mssing: conversion of float to float16
1 parent 06e26a8 commit bedb592

File tree

3 files changed

+13
-13
lines changed

3 files changed

+13
-13
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -921,11 +921,14 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
921921
if ((clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1] == 1 || reg_model_paths.size() == 1) {
922922
(clusterer.nnInternals)->OrtOptions["model-path"] = reg_model_paths[0];
923923
(clusterer.nnInternals)->model_reg_1.init((clusterer.nnInternals)->OrtOptions);
924+
(clusterer.nnInternals)->nnClusterizerModelClassNumOutputNodes = (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1];
924925
} else {
925926
(clusterer.nnInternals)->OrtOptions["model-path"] = reg_model_paths[0];
926927
(clusterer.nnInternals)->model_reg_1.init((clusterer.nnInternals)->OrtOptions);
928+
(clusterer.nnInternals)->nnClusterizerModelReg1NumOutputNodes = (clusterer.nnInternals)->model_reg_1.getNumOutputNodes()[0][1];
927929
(clusterer.nnInternals)->OrtOptions["model-path"] = reg_model_paths[1];
928930
(clusterer.nnInternals)->model_reg_2.init((clusterer.nnInternals)->OrtOptions);
931+
(clusterer.nnInternals)->nnClusterizerModelReg2NumOutputNodes = (clusterer.nnInternals)->model_reg_2.getNumOutputNodes()[0][1];
929932
}
930933
}
931934

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -59,17 +59,17 @@ template <>
5959
GPUdii() void GPUTPCNNClusterizer::Thread<GPUTPCNNClusterizer::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t dtype, int8_t onlyMC, uint batchStart)
6060
{
6161
uint glo_idx = get_global_id(0);
62-
uint elem_iterator = glo_idx * (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1];
62+
uint elem_iterator = glo_idx * (clusterer.nnInternals)->nnClusterizerModelClassNumOutputNodes;
6363
float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
6464
uint class_label = 0;
65-
for (float pIdx = elem_iterator; pIdx < elem_iterator + (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1]; pIdx++) {
65+
for (float pIdx = elem_iterator; pIdx < elem_iterator + (clusterer.nnInternals)->nnClusterizerModelClassNumOutputNodes; pIdx++) {
6666
if (pIdx == elem_iterator) {
6767
current_max_prob = (clusterer.nnInternals)->modelProbabilities[pIdx];
6868
} else {
6969
class_label = ((clusterer.nnInternals)->modelProbabilities[pIdx] > current_max_prob ? pIdx : class_label);
7070
}
7171
}
72-
// uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + (clusterer.nnInternals)->model_class.getNumOutputNodes()[0][1])); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins"
72+
// uint class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + (clusterer.nnInternals)->nnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins"
7373
(clusterer.nnInternals)->outputDataClass[glo_idx + batchStart] = class_label;
7474
}
7575

@@ -216,9 +216,9 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg1(uint glo_idx, GPUSharedMemo
216216
MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem);
217217
tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow;
218218
uint full_glo_idx = glo_idx + batchStart;
219-
int model_output_index = glo_idx * (clusterer.nnInternals)->model_reg_1.getNumOutputNodes()[0][1];
219+
int model_output_index = glo_idx * (clusterer.nnInternals)->nnClusterizerModelReg1NumOutputNodes;
220220

221-
// LOG(info) << glo_idx << " -- " << model_output_index << " / " << (clusterer.nnInternals)->outputDataReg1.size() << " / " << (clusterer.nnInternals)->model_reg_1.getNumOutputNodes()[0][1] << " -- " << (clusterer.nnInternals)->peakPositions.size() << " -- " << (clusterer.nnInternals)->centralCharges.size();
221+
// LOG(info) << glo_idx << " -- " << model_output_index << " / " << (clusterer.nnInternals)->outputDataReg1.size() << " / " << (clusterer.nnInternals)->nnClusterizerModelReg1NumOutputNodes << " -- " << (clusterer.nnInternals)->peakPositions.size() << " -- " << (clusterer.nnInternals)->centralCharges.size();
222222

223223
if ((clusterer.nnInternals)->outputDataClass[full_glo_idx] == 1) {
224224

@@ -288,9 +288,9 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg2(uint glo_idx, GPUSharedMemo
288288
MCLabelAccumulator* labelAcc = CPU_PTR(&labelAccElem);
289289
tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow;
290290
uint full_glo_idx = glo_idx + batchStart;
291-
int model_output_index = glo_idx * (clusterer.nnInternals)->model_reg_2.getNumOutputNodes()[0][1];
291+
int model_output_index = glo_idx * (clusterer.nnInternals)->nnClusterizerModelReg2NumOutputNodes;
292292

293-
// LOG(info) << glo_idx << " -- " << model_output_index << " / " << (clusterer.nnInternals)->outputDataReg1.size() << " / " << (clusterer.nnInternals)->model_reg_1.getNumOutputNodes()[0][1] << " -- " << (clusterer.nnInternals)->peakPositions.size() << " -- " << (clusterer.nnInternals)->centralCharges.size();
293+
// LOG(info) << glo_idx << " -- " << model_output_index << " / " << (clusterer.nnInternals)->outputDataReg1.size() << " / " << (clusterer.nnInternals)->nnClusterizerModelReg2NumOutputNodes << " -- " << (clusterer.nnInternals)->peakPositions.size() << " -- " << (clusterer.nnInternals)->centralCharges.size();
294294

295295
if ((clusterer.nnInternals)->outputDataClass[full_glo_idx] > 0) {
296296

@@ -323,9 +323,6 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg2(uint glo_idx, GPUSharedMemo
323323
tpc::ClusterNative myCluster;
324324
bool rejectCluster = !pc.toNative((clusterer.nnInternals)->peakPositions[glo_idx], (clusterer.nnInternals)->centralCharges[glo_idx], myCluster, clusterer.Param());
325325
if (rejectCluster) {
326-
if ((clusterer.nnInternals)->nnClusterizerVerbosity < 2) {
327-
LOG(warning) << "[NN, CF] Cluster rejected!";
328-
}
329326
if (clusterer.mPclusterPosInRow) {
330327
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
331328
}
@@ -354,9 +351,6 @@ GPUd() void GPUTPCNNClusterizer::publishClustersReg2(uint glo_idx, GPUSharedMemo
354351

355352
rejectCluster = !pc.toNative((clusterer.nnInternals)->peakPositions[glo_idx], (clusterer.nnInternals)->centralCharges[glo_idx], myCluster, clusterer.Param());
356353
if (rejectCluster) {
357-
if ((clusterer.nnInternals)->nnClusterizerVerbosity < 2) {
358-
LOG(warning) << "[NN, CF] Cluster rejected!";
359-
}
360354
if (clusterer.mPclusterPosInRow) {
361355
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
362356
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerInternals.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,9 @@ class GPUTPCNNClusterizerInternals
4040
int nnClusterizerBoundaryFillValue = -1;
4141
int nnClusterizerDumpDigits = 0;
4242
int nnClusterizerApplyCfDeconvolution = 0;
43+
int nnClusterizerModelClassNumOutputNodes = -1;
44+
int nnClusterizerModelReg1NumOutputNodes = -1;
45+
int nnClusterizerModelReg2NumOutputNodes = -1;
4346

4447
// Memory allocation for neural network
4548
uint class2_elements = 0;

0 commit comments

Comments
 (0)