Skip to content

Commit 86b1969

Browse files
NN clusterizer: Bug-fixes and addition of deconvolution kernel (#14378)
* First bug-fixes and optimizations for deconvolution flags * Adding publishing logic for deconvolution flags * Adjusting kernels.cmake * Please consider the following formatting changes * Bug-fix for time-position and boundary check in fillInputSingleElement * Fix for kernels.cmake and naming * Changing to uint8_t * Adding kernel definition --------- Co-authored-by: ALICE Action Bot <alibuild@cern.ch>
1 parent d4fb131 commit 86b1969

File tree

10 files changed

+81
-34
lines changed

10 files changed

+81
-34
lines changed

GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -497,6 +497,7 @@
497497
#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels
498498
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels
499499
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels
500+
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishDeconvolutionFlags GPUCA_LB_GPUTPCNNClusterizerKernels
500501

501502
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
502503
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -269,6 +269,7 @@ AddOption(nnClusterizerBatchedMode, unsigned int, 1, "", 0, "(int, default = 1)
269269
AddOption(nnClusterizerVerbosity, int, -1, "", 0, "(int, default = -1) If >0, logging messages of the clusterizer will be displayed")
270270
AddOption(nnClusterizerBoundaryFillValue, int, -1, "", 0, "Fill value for the boundary of the input to the NN")
271271
AddOption(nnClusterizerApplyNoiseSuppression, int, 1, "", 0, "Applies the NoiseSuppression kernel before the digits to the network are filled")
272+
AddOption(nnClusterizerSetDeconvolutionFlags, int, 1, "", 0, "Runs the deconvolution kernel without overwriting the charge in order to make cluster-to-track attachment identical to heuristic CF")
272273
AddOption(nnClassificationPath, std::string, "network_class.onnx", "", 0, "The classification network path")
273274
AddOption(nnClassThreshold, float, 0.5, "", 0, "The cutoff at which clusters will be accepted / rejected.")
274275
AddOption(nnRegressionPath, std::string, "network_reg.onnx", "", 0, "The regression network path")

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -989,8 +989,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
989989
int withMC = (doGPU && propagateMCLabels);
990990

991991
if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
992-
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
992+
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
993993
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
994+
} else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
995+
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false);
994996
}
995997

996998
// float time_clusterizer = 0, time_fill = 0, time_networks = 0;
@@ -1001,6 +1003,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10011003
// auto start0 = std::chrono::high_resolution_clock::now();
10021004
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNSingleElement>({GetGrid(iSize * clustererNNShadow.mNnClusterizerElementSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the data
10031005

1006+
if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
1007+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the regression data
1008+
}
1009+
10041010
// auto stop0 = std::chrono::high_resolution_clock::now();
10051011
// auto start1 = std::chrono::high_resolution_clock::now();
10061012

@@ -1102,7 +1108,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11021108
GPUFatal("Project not compiled with neural network clusterization. Aborting.");
11031109
#endif
11041110
} else {
1105-
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});
1111+
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
11061112
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
11071113
runKernel<GPUTPCCFClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), {iSector}}, 0);
11081114
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.cxx

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -22,18 +22,19 @@ using namespace o2::gpu;
2222
using namespace o2::gpu::tpccf;
2323

2424
template <>
25-
GPUdii() void GPUTPCCFDeconvolution::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
25+
GPUdii() void GPUTPCCFDeconvolution::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, uint8_t overwriteCharge)
2626
{
2727
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
2828
CfArray2D<uint8_t> isPeakMap(clusterer.mPpeakMap);
29-
GPUTPCCFDeconvolution::deconvolutionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, isPeakMap, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions);
29+
GPUTPCCFDeconvolution::deconvolutionImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, isPeakMap, chargeMap, clusterer.mPpositions, clusterer.mPmemory->counters.nPositions, overwriteCharge);
3030
}
3131

3232
GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem,
3333
const CfArray2D<uint8_t>& peakMap,
3434
CfArray2D<PackedCharge>& chargeMap,
3535
const CfChargePos* positions,
36-
const uint32_t digitnum)
36+
const uint32_t digitnum,
37+
uint8_t overwriteCharge)
3738
{
3839
SizeT idx = get_global_id(0);
3940

@@ -111,9 +112,14 @@ GPUdii() void GPUTPCCFDeconvolution::deconvolutionImpl(int32_t nBlocks, int32_t
111112
peakCount = (peakCount == 0) ? 1 : peakCount;
112113

113114
PackedCharge charge = chargeMap[pos];
114-
PackedCharge p(charge.unpack() / peakCount, has3x3, split);
115115

116-
chargeMap[pos] = p;
116+
if (overwriteCharge) {
117+
PackedCharge p(charge.unpack() / peakCount, has3x3, split);
118+
chargeMap[pos] = p;
119+
} else {
120+
PackedCharge p(charge.unpack(), has3x3, split);
121+
chargeMap[pos] = p;
122+
}
117123
}
118124

119125
GPUdi() uint8_t GPUTPCCFDeconvolution::countPeaksInner(

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFDeconvolution.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class GPUTPCCFDeconvolution : public GPUKernelTemplate
5151
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args);
5252

5353
private:
54-
static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D<uint8_t>&, CfArray2D<PackedCharge>&, const CfChargePos*, const uint32_t);
54+
static GPUd() void deconvolutionImpl(int32_t, int32_t, int32_t, int32_t, GPUSharedMemory&, const CfArray2D<uint8_t>&, CfArray2D<PackedCharge>&, const CfChargePos*, const uint32_t, uint8_t);
5555

5656
static GPUdi() uint8_t countPeaksInner(uint16_t, const uint8_t*, uint8_t*);
5757
static GPUdi() uint8_t countPeaksOuter(uint16_t, uint8_t, const uint8_t*);

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ class GPUTPCNNClusterizer : public GPUProcessor
4444
bool mNnClusterizerAddIndexData = true;
4545
float mNnClassThreshold = 0.01;
4646
bool mNnSigmoidTrafoClassThreshold = 1;
47+
bool mNnClusterizerSetDeconvolutionFlags = true;
4748
int mNnClusterizerUseCfRegression = 0;
4849
int mNnClusterizerBatchedMode = 1;
4950
int mNnClusterizerTotalClusters = 1;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
9292
clustererNN.mNnClusterizerBatchedMode = settings.nnClusterizerBatchedMode;
9393
clustererNN.mNnClusterizerBoundaryFillValue = settings.nnClusterizerBoundaryFillValue;
9494
clustererNN.mNnSigmoidTrafoClassThreshold = settings.nnSigmoidTrafoClassThreshold;
95+
clustererNN.mNnClusterizerSetDeconvolutionFlags = (bool)settings.nnClusterizerSetDeconvolutionFlags;
9596
if (clustererNN.mNnSigmoidTrafoClassThreshold) {
9697
clustererNN.mNnClassThreshold = (float)std::log(settings.nnClassThreshold / (1.f - settings.nnClassThreshold));
9798
} else {

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 47 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run
5151
}
5252

5353
template <>
54-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
54+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
5555
{
5656
uint32_t glo_idx = get_global_id(0);
5757
auto& clusterer = processors.tpcClusterer[sector];
@@ -111,7 +111,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
111111
}
112112

113113
template <>
114-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
114+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
115115
{
116116
uint32_t glo_idx = get_global_id(0);
117117
auto& clusterer = processors.tpcClusterer[sector];
@@ -126,11 +126,13 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
126126

127127
if (clustererNN.mNnClusterizerAddIndexData && (int32_t)transient_index == (clustererNN.mNnClusterizerElementSize - 1)) {
128128
uint32_t top_idx = (base_idx + 1) * clustererNN.mNnClusterizerElementSize;
129-
for (uint16_t i = 0; i < 8; i++) {
130-
Delta2 d = cfconsts::InnerNeighbors[i];
131-
CfChargePos tmp_pos = peak.delta(d);
132-
clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
133-
clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
129+
if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) { // Only if deconvolution flags are not set
130+
for (uint16_t i = 0; i < 8; i++) { // This solution needs testing. It is not the same as the deconvolution flags
131+
Delta2 d = cfconsts::InnerNeighbors[i];
132+
CfChargePos tmp_pos = peak.delta(d);
133+
clustererNN.mClusterFlags[2 * base_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
134+
}
135+
clustererNN.mClusterFlags[2 * base_idx + 1] = clustererNN.mClusterFlags[2 * base_idx];
134136
}
135137
if (dtype == 0) {
136138
clustererNN.mInputData_16[top_idx - 3] = (OrtDataType::Float16_t)(sector / 36.f);
@@ -147,40 +149,40 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
147149
bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
148150
if (is_row_boundary) {
149151
if (dtype == 0) {
150-
clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
152+
clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
151153
} else {
152-
clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
154+
clustererNN.mInputData_32[glo_idx] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
153155
}
154156
} else {
155157
int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
156158
int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r);
157159
int32_t rest_1 = transient_index % ((2 * clustererNN.mNnClusterizerSizeInputPad + 1) * (2 * clustererNN.mNnClusterizerSizeInputTime + 1));
158160
int32_t p = CAMath::Floor(rest_1 / (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputPad + pad_offset;
159-
int32_t t = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime;
161+
int32_t time_pos = (rest_1 % (2 * clustererNN.mNnClusterizerSizeInputTime + 1)) - clustererNN.mNnClusterizerSizeInputTime + time;
160162

161-
bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (t < 0 || t >= TPC_MAX_FRAGMENT_LEN_GPU);
163+
bool is_boundary = GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow) && (time_pos < 0 || time_pos >= TPC_MAX_FRAGMENT_LEN_GPU);
162164

163165
if (!is_boundary) {
164166
float central_charge = static_cast<float>(chargeMap[peak].unpack());
165-
CfChargePos tmp_pos(row + r, pad + p, time + t);
167+
CfChargePos tmp_pos(row + r, pad + p, time_pos);
166168
if (dtype == 0) {
167-
clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
169+
clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
168170
} else if (dtype == 1) {
169-
clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
171+
clustererNN.mInputData_32[glo_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
170172
}
171173
} else {
172174
if (dtype == 0) {
173-
clustererNN.mInputData_16[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
175+
clustererNN.mInputData_16[glo_idx] = (OrtDataType::Float16_t)(static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue));
174176
} else {
175-
clustererNN.mInputData_32[base_idx * clustererNN.mNnClusterizerElementSize + transient_index] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
177+
clustererNN.mInputData_32[glo_idx] = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
176178
}
177179
}
178180
}
179181
}
180182
}
181183

182184
template <>
183-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
185+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass1Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
184186
{
185187
uint32_t glo_idx = get_global_id(0);
186188
if (dtype == 0) {
@@ -191,7 +193,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
191193
}
192194

193195
template <>
194-
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t onlyMC, uint32_t batchStart)
196+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::determineClass2Labels>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
195197
{
196198
auto& clustererNN = processors.tpcNNClusterer[sector];
197199
uint32_t glo_idx = get_global_id(0);
@@ -457,6 +459,33 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
457459
}
458460
}
459461

462+
// ---------------------------------
463+
template <>
464+
GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint batchStart)
465+
{
466+
// Implements identical publishing logic as the heuristic clusterizer and deconvolution kernel
467+
uint32_t idx = get_global_id(0);
468+
auto& clusterer = processors.tpcClusterer[sector];
469+
auto& clustererNN = processors.tpcNNClusterer[sector];
470+
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
471+
CfChargePos peak = clusterer.mPfilteredPeakPositions[idx + batchStart];
472+
473+
for (int i = 0; i < 8; i++) {
474+
Delta2 d = cfconsts::InnerNeighbors[i];
475+
CfChargePos tmp_pos = peak.delta(d);
476+
PackedCharge charge = chargeMap[tmp_pos];
477+
clustererNN.mClusterFlags[2 * idx] += (d.y != 0 && charge.isSplit());
478+
clustererNN.mClusterFlags[2 * idx + 1] += (d.x != 0 && charge.isSplit());
479+
}
480+
for (int i = 0; i < 16; i++) {
481+
Delta2 d = cfconsts::OuterNeighbors[i];
482+
CfChargePos tmp_pos = peak.delta(d);
483+
PackedCharge charge = chargeMap[tmp_pos];
484+
clustererNN.mClusterFlags[2 * idx] += (d.y != 0 && charge.isSplit() && !charge.has3x3Peak());
485+
clustererNN.mClusterFlags[2 * idx + 1] += (d.x != 0 && charge.isSplit() && !charge.has3x3Peak());
486+
}
487+
}
488+
460489
// THe following arithmetic is done because the network is trained with a split between IROC and OROC boundary
461490
GPUd() int32_t GPUTPCNNClusterizerKernels::padOffset(int32_t row_ref, int32_t row_current)
462491
{

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,7 @@ class GPUTPCNNClusterizerKernels : public GPUKernelTemplate
6565
determineClass2Labels = 4,
6666
publishClass1Regression = 5,
6767
publishClass2Regression = 6,
68+
publishDeconvolutionFlags = 7
6869
};
6970

7071
template <int32_t iKernel = defaultKernel, typename... Args>

0 commit comments

Comments
 (0)