Skip to content

Commit 1bee99a

Browse files
committed
Adjusting kernels for GPU safe rejection
1 parent d3f439e commit 1bee99a

File tree

2 files changed

+48
-35
lines changed

2 files changed

+48
-35
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,12 +1110,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11101110
}
11111111

11121112
// Publishing kernels for class labels and regression results
1113-
if(clustererNNShadow.mNnClusterizerUseClassification) {
1114-
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) {
1115-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
1116-
} else {
1117-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
1118-
}
1113+
// In case classification should not be used, this kernel should still be executed to fill the mOutputDataClass array with default values
1114+
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) {
1115+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
1116+
} else {
1117+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
11191118
}
11201119
if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
11211120
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 1 regression results

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 43 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -47,7 +47,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run
4747
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
4848
CPU_ONLY(MCLabelAccumulator labelAcc(clusterer));
4949
tpc::ClusterNative* clusterOut = (withMC) ? nullptr : clusterer.mPclusterByRow;
50-
int8_t isAccepted = (clustererNN.mNnClusterizerUseClassification ? clustererNN.mOutputDataClass[CAMath::Min(glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] : 1);
50+
int8_t isAccepted = (clustererNN.mNnClusterizerUseClassification ? (clustererNN.mOutputDataClass[CAMath::Min(glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] > 0) : 1);
5151
GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, reinterpret_cast<GPUTPCCFClusterizer::GPUSharedMemory&>(smem), chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow, isAccepted);
5252
}
5353

@@ -275,10 +275,14 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
275275
if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters || glo_idx >= clustererNN.mNnClusterizerBatchedMode) {
276276
return;
277277
}
278-
if (dtype == 0) {
279-
processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int32_t)((processors.tpcNNClusterer[sector].mModelProbabilities_16[glo_idx]).ToFloat() > processors.tpcNNClusterer[sector].mNnClassThreshold);
280-
} else if (dtype == 1) {
281-
processors.tpcNNClusterer[sector].mOutputDataClass[glo_idx + batchStart] = (int32_t)(processors.tpcNNClusterer[sector].mModelProbabilities_32[glo_idx] > processors.tpcNNClusterer[sector].mNnClassThreshold);
278+
if(clustererNN.mNnClusterizerUseClassification) {
279+
if (dtype == 0) {
280+
clustererNN.mOutputDataClass[glo_idx + batchStart] = (int32_t)((clustererNN.mModelProbabilities_16[glo_idx]).ToFloat() > clustererNN.mNnClassThreshold);
281+
} else if (dtype == 1) {
282+
clustererNN.mOutputDataClass[glo_idx + batchStart] = (int32_t)(clustererNN.mModelProbabilities_32[glo_idx] > clustererNN.mNnClassThreshold);
283+
}
284+
} else {
285+
clustererNN.mOutputDataClass[glo_idx + batchStart] = 1;
282286
}
283287
}
284288

@@ -291,29 +295,33 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
291295
if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters || glo_idx >= clustererNN.mNnClusterizerBatchedMode) {
292296
return;
293297
}
294-
uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
295-
float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
296-
uint32_t class_label = 0;
297-
for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
298-
if (pIdx == elem_iterator) {
299-
if (dtype == 0) {
300-
current_max_prob = static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
301-
} else if (dtype == 1) {
302-
current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
303-
}
304-
} else {
305-
if (dtype == 0) {
306-
current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
307-
} else if (dtype == 1) {
308-
current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
298+
if(clustererNN.mNnClusterizerUseClassification) {
299+
uint32_t elem_iterator = glo_idx * clustererNN.mNnClusterizerModelClassNumOutputNodes;
300+
float current_max_prob = 0.f; // If the neural network doesn't contain the softmax as a last layer, the outputs can range in [-infty, infty]
301+
uint32_t class_label = 0;
302+
for (uint32_t pIdx = elem_iterator; pIdx < elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes; pIdx++) {
303+
if (pIdx == elem_iterator) {
304+
if (dtype == 0) {
305+
current_max_prob = static_cast<float>(clustererNN.mModelProbabilities_16[pIdx]);
306+
} else if (dtype == 1) {
307+
current_max_prob = clustererNN.mModelProbabilities_32[pIdx];
308+
}
309+
} else {
310+
if (dtype == 0) {
311+
current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_16[pIdx].ToFloat());
312+
} else if (dtype == 1) {
313+
current_max_prob = CAMath::Max(current_max_prob, clustererNN.mModelProbabilities_32[pIdx]);
314+
}
309315
}
310316
}
311-
}
312-
// uint32_t class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins"
313-
clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
314-
if (class_label > 1) {
315-
clustererNN.mClusterFlags[2 * glo_idx] = 1;
316-
clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
317+
// uint32_t class_label = std::distance(elem_iterator, std::max_element(elem_iterator, elem_iterator + clustererNN.mNnClusterizerModelClassNumOutputNodes)); // Multiple outputs of the class network are the probabilities for each class. The highest one "wins"
318+
clustererNN.mOutputDataClass[glo_idx + batchStart] = class_label;
319+
if (class_label > 1) {
320+
clustererNN.mClusterFlags[2 * glo_idx] = 1;
321+
clustererNN.mClusterFlags[2 * glo_idx + 1] = 1;
322+
}
323+
} else {
324+
clustererNN.mOutputDataClass[glo_idx + batchStart] = 1;
317325
}
318326
}
319327

@@ -411,7 +419,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
411419

412420
tpc::ClusterNative myCluster;
413421
bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
414-
rejectCluster &= (clustererNN.mNnClusterizerUseClassification ? clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] : 1);
422+
if (clustererNN.mNnClusterizerUseClassification) {
423+
rejectCluster |= (clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] <= 0);
424+
}
415425
if (rejectCluster) {
416426
if (clusterer.mPclusterPosInRow) {
417427
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
@@ -519,7 +529,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
519529

520530
tpc::ClusterNative myCluster;
521531
bool rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
522-
rejectCluster &= (clustererNN.mNnClusterizerUseClassification ? clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] : 1);
532+
if (clustererNN.mNnClusterizerUseClassification) {
533+
rejectCluster |= (clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] <= 0);
534+
}
523535
if (rejectCluster) {
524536
if (clusterer.mPclusterPosInRow) {
525537
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;
@@ -564,7 +576,9 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
564576
}
565577

566578
rejectCluster = !pc.toNative(peak, central_charge, myCluster, clusterer.Param(), chargeMap);
567-
rejectCluster &= (clustererNN.mNnClusterizerUseClassification ? clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] : 1);
579+
if (clustererNN.mNnClusterizerUseClassification) {
580+
rejectCluster |= (clustererNN.mOutputDataClass[CAMath::Min(full_glo_idx, (uint32_t)clusterer.mPmemory->counters.nClusters - 1)] <= 0);
581+
}
568582
if (rejectCluster) {
569583
if (clusterer.mPclusterPosInRow) {
570584
clusterer.mPclusterPosInRow[full_glo_idx] = clusterer.mNMaxClusterPerRow;

0 commit comments

Comments
 (0)