Skip to content

Commit d3f439e

Browse files
committed
Fixing smem usage from CFClusterizer and adding rejection flag -> No out-of-bounds in QC anymore
1 parent d87f8fa commit d3f439e

File tree

6 files changed

+188
-207
lines changed

6 files changed

+188
-207
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1110,10 +1110,12 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11101110
}
11111111

11121112
// Publishing kernels for class labels and regression results
1113-
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) {
1114-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
1115-
} else {
1116-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
1113+
if(clustererNNShadow.mNnClusterizerUseClassification) {
1114+
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) {
1115+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
1116+
} else {
1117+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass2Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Assigning class labels
1118+
}
11171119
}
11181120
if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
11191121
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass1Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, propagateMCLabels, batchStart); // Publishing class 1 regression results

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,5 +35,5 @@ GPUdii() void GPUTPCCFClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads,
3535

3636
tpc::ClusterNative* clusterOut = (onlyMC) ? nullptr : clusterer.mPclusterByRow;
3737

38-
GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow);
38+
GPUTPCCFClusterizer::computeClustersImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer, clusterer.mPmemory->fragment, smem, chargeMap, clusterer.mPfilteredPeakPositions, clusterer.Param().rec, CPU_PTR(&labelAcc), clusterer.mPmemory->counters.nClusters, clusterer.mNMaxClusterPerRow, clusterer.mPclusterInRow, clusterOut, clusterer.mPclusterPosInRow, true);
3939
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class GPUTPCCFClusterizer : public GPUKernelTemplate
5757
template <int32_t iKernel = defaultKernel>
5858
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t);
5959

60-
static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const CfArray2D<PackedCharge>&, const CfChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*);
60+
static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const CfArray2D<PackedCharge>&, const CfChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*, int8_t);
6161

6262
static GPUd() void buildCluster(const GPUSettingsRec&, const CfArray2D<PackedCharge>&, CfChargePos, CfChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*);
6363

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFClusterizer.inc

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t
2727
uint32_t maxClusterPerRow,
2828
uint32_t* clusterInRow,
2929
tpc::ClusterNative* clusterByRow,
30-
uint32_t* clusterPosInRow)
30+
uint32_t* clusterPosInRow,
31+
int8_t isAccepted)
3132
{
3233
uint32_t idx = get_global_id(0);
3334

@@ -62,6 +63,9 @@ GPUdii() void GPUTPCCFClusterizer::computeClustersImpl(int32_t nBlocks, int32_t
6263
tpc::ClusterNative myCluster;
6364
pc.finalize(pos, charge, fragment.start);
6465
bool rejectCluster = !pc.toNative(pos, charge, myCluster, clusterer.Param(), chargeMap);
66+
if (!isAccepted) {
67+
rejectCluster = true;
68+
}
6569

6670
if (rejectCluster) {
6771
if (clusterPosInRow) {

0 commit comments

Comments
 (0)