Skip to content

Commit 9c8984c

Browse files
committed
Improve GPU filling kernel speed
1 parent 24d15d0 commit 9c8984c

File tree

5 files changed

+104
-90
lines changed

5 files changed

+104
-90
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -642,6 +642,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
642642

643643
// Maximum of 4 lanes supported
644644
HighResTimer* nnTimers[12];
645+
int32_t countLoops = 0;
645646

646647
if (GetProcessingSettings().nn.applyNNclusterizer) {
647648
int32_t deviceId = -1;
@@ -1035,7 +1036,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10351036
// Filling the data
10361037
if (mRec->IsGPU() || GetProcessingSettings().nn.nnClusterizerForceGpuInputFill) {
10371038
// Fills element by element of each input matrix -> better parallelizability, but worse on CPU due to unnecessary computations
1038-
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNGPU>({GetGrid(iSize * clustererNNShadow.mNnClusterizerRowTimeSizeFull, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
1039+
for(int throughput_counter = 0; throughput_counter < 16; throughput_counter++) { // Loop to increase throughput on GPU, at least for large batch sizes
1040+
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNGPU>({GetGrid(clustererNNShadow.mNnClusterizerBatchedMode * clustererNNShadow.mNnClusterizerRowTimeSizeThreads , lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
1041+
countLoops++;
1042+
}
10391043
} else {
10401044
// Fills the whole input matrix at once -> better performance on CPU, but worse parallelizability
10411045
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::fillInputNNCPU>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, propagateMCLabels, batchStart);
@@ -1138,6 +1142,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11381142
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with CF regression. (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
11391143
}
11401144
}
1145+
LOG(info) << "countLoops: " << countLoops;
11411146
#else
11421147
GPUFatal("Project not compiled with neural network clusterization. Aborting.");
11431148
#endif

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@ class GPUTPCNNClusterizer : public GPUProcessor
7171
uint32_t mNnClusterizerPadTimeSize = 0;
7272
uint32_t mNnClusterizerRowTimeSize = 0;
7373
uint32_t mNnClusterizerRowTimeSizeFull = 0;
74+
uint32_t mNnClusterizerRowTimeSizeThreads = 0;
7475

7576
// Boundary lookup table
7677
// int32_t mBoundaryMapSizeRow = 0;

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerHost.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -98,6 +98,7 @@ void GPUTPCNNClusterizerHost::initClusterizer(const GPUSettingsProcessingNNclust
9898
clustererNN.mNnClusterizerPadTimeSize = clustererNN.mNnClusterizerFullPadSize * clustererNN.mNnClusterizerFullTimeSize;
9999
clustererNN.mNnClusterizerRowTimeSize = clustererNN.mNnClusterizerFullRowSize * clustererNN.mNnClusterizerFullTimeSize;
100100
clustererNN.mNnClusterizerRowTimeSizeFull = clustererNN.mNnClusterizerRowTimeSize + (settings.nnClusterizerAddIndexData ? 3 : 0);
101+
clustererNN.mNnClusterizerRowTimeSizeThreads = clustererNN.mNnClusterizerRowTimeSize + (settings.nnClusterizerAddIndexData ? 1 : 0);
101102
clustererNN.mNnClusterizerElementSize = clustererNN.mNnClusterizerChargeArraySize + (settings.nnClusterizerAddIndexData ? 3 : 0);
102103
// clustererNN.mBoundaryMapSizeRow = 3 * clustererNN.mNnClusterizerSizeInputRow + o2::tpc::constants::MAXGLOBALPADROW;
103104
// clustererNN.mBoundaryPadding = 11; // padding on each side to account for pad_offset. N=11 since then mIsBoundary = 24320 ~< (1.5 x 2^14 = 24576) && N must be bigger than (NPads[row(end_iroc + 1)] - NPads[row(end_iroc)])/2 (=6) for pad_offset to work

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 94 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,19 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
7272
int32_t time = static_cast<int>(peak.time());
7373
float central_charge = static_cast<float>(chargeMap[peak].unpack());
7474
int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
75+
const int32_t iroc_row = 63 + row_offset;
76+
const int32_t maxrow = o2::tpc::constants::MAXGLOBALPADROW + row_offset;
77+
const int32_t npads_row = GPUTPCGeometry::NPads(row);
7578

7679
for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; ++r) {
7780
int32_t target_row = row + r;
7881
bool is_row_boundary = (target_row < 0) || (target_row >= o2::tpc::constants::MAXGLOBALPADROW);
7982
int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, target_row);
83+
int32_t npads_reference = is_row_boundary ? 0 : GPUTPCGeometry::NPads(target_row + row_offset);
8084

8185
for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; ++p) {
8286
int32_t target_pad = pad + p;
83-
bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow);
87+
bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, maxrow, iroc_row, npads_row, npads_reference);
8488

8589
for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; ++t) {
8690
int32_t target_time = time + t;
@@ -143,125 +147,119 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
143147
auto& clusterer = processors.tpcClusterer[sector];
144148
auto& clustererNN = processors.tpcNNClusterer[sector];
145149

146-
if (glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode * clustererNN.mNnClusterizerRowTimeSizeFull) {
150+
if (glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode * clustererNN.mNnClusterizerRowTimeSizeThreads) {
147151
return;
148152
}
149153

150-
uint32_t base_idx = glo_idx / clustererNN.mNnClusterizerRowTimeSizeFull;
151-
uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerRowTimeSizeFull);
154+
uint32_t base_idx = glo_idx / clustererNN.mNnClusterizerRowTimeSizeThreads;
155+
uint32_t transient_index = glo_idx - (base_idx * clustererNN.mNnClusterizerRowTimeSizeThreads);
152156

153157
// Early exit for out-of-bounds threads
154-
if (base_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
155-
return;
156-
}
158+
// if (base_idx + batchStart >= clusterer.mPmemory->counters.nClusters) {
159+
// return;
160+
// }
157161
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
158162
CfArray2D<uint8_t> isPeakMap(clusterer.mPpeakMap);
159163

160164
// Use dedicated neural network shared memory arrays for warp-level caching
161165
// First thread in each warp loads shared data
162166
CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(base_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
163-
float central_charge = static_cast<float>(chargeMap[peak].unpack());
167+
float central_charge = chargeMap[peak].unpack();
164168
int32_t row = static_cast<int>(peak.row());
165169
int32_t pad = static_cast<int>(peak.pad());
166170
int32_t time = static_cast<int>(peak.time());
167171

172+
const int32_t npads_row = GPUTPCGeometry::NPads(row);
173+
168174
// Handle index data with fewer branches
169175
if (clustererNN.mNnClusterizerAddIndexData && transient_index >= clustererNN.mNnClusterizerRowTimeSize) {
170-
int32_t data_idx = transient_index - clustererNN.mNnClusterizerRowTimeSize;
171-
uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize + data_idx;
172-
173-
float index_values[3] = {
174-
static_cast<float>(sector) / o2::tpc::constants::MAXSECTOR,
175-
static_cast<float>(row) / o2::tpc::constants::MAXGLOBALPADROW,
176-
static_cast<float>(pad) / GPUTPCGeometry::NPads(row)};
177-
176+
// int32_t data_idx = transient_index - clustererNN.mNnClusterizerRowTimeSize;
177+
// uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize + data_idx;
178+
//
179+
// float index_values[3] = {
180+
// static_cast<float>(sector) / o2::tpc::constants::MAXSECTOR,
181+
// static_cast<float>(row) / o2::tpc::constants::MAXGLOBALPADROW,
182+
// static_cast<float>(pad) / GPUTPCGeometry::NPads(row)};
183+
//
184+
// if (dtype == 0) {
185+
// clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)index_values[data_idx];
186+
// } else {
187+
// clustererNN.mInputData_32[write_idx] = index_values[data_idx];
188+
// }
189+
//
190+
// // Handle deconvolution flags only once per cluster (last thread in element)
191+
// if (!clustererNN.mNnClusterizerSetDeconvolutionFlags && data_idx == 2) {
192+
// uint8_t cluster_flags = 0;
193+
// for (uint16_t i = 0; i < 8; i++) {
194+
// Delta2 d = cfconsts::InnerNeighbors[i];
195+
// CfChargePos tmp_pos = peak.delta(d);
196+
// cluster_flags += CfUtils::isPeak(isPeakMap[tmp_pos]);
197+
// }
198+
// clustererNN.mClusterFlags[2 * base_idx] = cluster_flags;
199+
// clustererNN.mClusterFlags[2 * base_idx + 1] = cluster_flags;
200+
// }
201+
// return;
202+
uint32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + clustererNN.mNnClusterizerChargeArraySize;
178203
if (dtype == 0) {
179-
clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)index_values[data_idx];
204+
clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(sector) / o2::tpc::constants::MAXSECTOR);
205+
clustererNN.mInputData_16[write_idx + 1] = (OrtDataType::Float16_t)(static_cast<float>(row) / o2::tpc::constants::MAXGLOBALPADROW);
206+
clustererNN.mInputData_16[write_idx + 2] = (OrtDataType::Float16_t)(static_cast<float>(pad) / npads_row);
180207
} else {
181-
clustererNN.mInputData_32[write_idx] = index_values[data_idx];
182-
}
183-
184-
// Handle deconvolution flags only once per cluster (last thread in element)
185-
if (!clustererNN.mNnClusterizerSetDeconvolutionFlags && data_idx == 2) {
186-
uint8_t cluster_flags = 0;
187-
for (uint16_t i = 0; i < 8; i++) {
188-
Delta2 d = cfconsts::InnerNeighbors[i];
189-
CfChargePos tmp_pos = peak.delta(d);
190-
cluster_flags += CfUtils::isPeak(isPeakMap[tmp_pos]);
191-
}
192-
clustererNN.mClusterFlags[2 * base_idx] = cluster_flags;
193-
clustererNN.mClusterFlags[2 * base_idx + 1] = cluster_flags;
208+
clustererNN.mInputData_32[write_idx] = static_cast<float>(sector) / o2::tpc::constants::MAXSECTOR;
209+
clustererNN.mInputData_32[write_idx + 1] = static_cast<float>(row) / o2::tpc::constants::MAXGLOBALPADROW;
210+
clustererNN.mInputData_32[write_idx + 2] = static_cast<float>(pad) / npads_row;
194211
}
195-
return;
196212
}
197213

198214
// Main data processing - optimize index calculations
199215
if (transient_index < clustererNN.mNnClusterizerRowTimeSize) {
200216
// Optimize 3D index calculation
201-
int32_t row_idx = transient_index / clustererNN.mNnClusterizerFullTimeSize;
202-
int32_t r_local = row_idx - clustererNN.mNnClusterizerSizeInputRow;
203-
int32_t time_idx = transient_index - row_idx * clustererNN.mNnClusterizerFullTimeSize;
204-
int32_t t_local = time_idx - clustererNN.mNnClusterizerSizeInputTime;
217+
const int32_t row_idx = transient_index / clustererNN.mNnClusterizerFullTimeSize;
218+
const int32_t time_idx = transient_index - row_idx * clustererNN.mNnClusterizerFullTimeSize;
205219
int32_t write_idx = base_idx * clustererNN.mNnClusterizerElementSize + row_idx * clustererNN.mNnClusterizerPadTimeSize + time_idx;
206220

207221
// Early boundary check for row
208-
int32_t target_row = row + r_local;
209-
int8_t is_row_boundary = (target_row < 0) || (target_row > (o2::tpc::constants::MAXGLOBALPADROW - 1));
222+
const int32_t target_row = row + row_idx - clustererNN.mNnClusterizerSizeInputRow;
223+
const int8_t is_row_boundary = (target_row < 0) || (target_row > (o2::tpc::constants::MAXGLOBALPADROW - 1));
224+
const int32_t target_time = time + time_idx - clustererNN.mNnClusterizerSizeInputTime;
225+
const uint8_t is_time_boundary = (target_time < 0) || (target_time >= clustererNN.maxAllowedTimebin);
226+
const float inverse_central_charge = 1.f / central_charge; // multiply by inverse is cheaper than divide
210227

211228
// Calculate offsets
212-
int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
213-
int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, target_row);
214-
for (int32_t p_local = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p_local++) {
229+
// int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
230+
// int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, target_row);
231+
const int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
232+
const int32_t iroc_row = 63 + row_offset;
233+
const int32_t maxrow = o2::tpc::constants::MAXGLOBALPADROW + row_offset;
234+
const int32_t p_local = pad + GPUTPCNNClusterizerKernels::padOffset(row, target_row);
235+
const int32_t boundary_row = target_row + row_offset;
236+
const int32_t npads_reference = is_row_boundary ? 0 : GPUTPCGeometry::NPads(boundary_row - clustererNN.mNnClusterizerSizeInputRow);
237+
const float boundary_val = clustererNN.mNnClusterizerBoundaryFillValue;
238+
239+
float output_value = boundary_val;
240+
241+
const int32_t start_pad = -clustererNN.mNnClusterizerSizeInputPad + p_local;
242+
const int32_t end_pad = clustererNN.mNnClusterizerSizeInputPad + p_local;
243+
244+
for (int32_t target_pad = start_pad; target_pad <= end_pad; ++target_pad) {
215245
if (is_row_boundary) {
216-
// Use boundary fill value
217-
float boundary_val = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
218-
if (dtype == 0) {
219-
clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)boundary_val;
246+
output_value = boundary_val;
247+
} else {
248+
const uint8_t is_boundary = is_time_boundary || GPUTPCNNClusterizerKernels::isBoundary(boundary_row, target_pad, maxrow, iroc_row, npads_row, npads_reference);
249+
if (!is_boundary) {
250+
CfChargePos pos(target_row, target_pad, target_time);
251+
// one load + one multiply
252+
output_value = chargeMap[pos].unpack() * inverse_central_charge;
220253
} else {
221-
clustererNN.mInputData_32[write_idx] = boundary_val;
254+
output_value = boundary_val;
222255
}
223-
write_idx += clustererNN.mNnClusterizerFullTimeSize; // Move to next pad position
224-
continue;
225-
}
226-
227-
// Calculate target pad and time
228-
int32_t target_pad = pad + p_local;
229-
int32_t target_time = time + t_local;
230-
231-
// Optimized boundary check
232-
int8_t is_boundary = GPUTPCNNClusterizerKernels::isBoundary(target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow) || (target_time < 0) || (target_time >= clustererNN.maxAllowedTimebin);
233-
234-
float output_value;
235-
if (is_boundary) {
236-
output_value = static_cast<float>(clustererNN.mNnClusterizerBoundaryFillValue);
237-
} else {
238-
// Coalesced memory access - create position and read charge
239-
CfChargePos tmp_pos(target_row, target_pad, target_time);
240-
output_value = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge; // Normalize by central charge
241256
}
242-
243-
// Write output with reduced branching
244257
if (dtype == 0) {
245258
clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)output_value;
246259
} else {
247260
clustererNN.mInputData_32[write_idx] = output_value;
248261
}
249-
// if (write_idx >= clustererNN.mNnClusterizerElementSize * clustererNN.mNnClusterizerBatchedMode) {
250-
// printf("Error: Write index out of bounds (central array)! %d >= %d (write_idx: %d, base_idx: %d, transient_index: %d, row_idx: %d, time_idx: %d, r_local: %d, t_local: %d)\n",
251-
// write_idx, (int)(clustererNN.mNnClusterizerElementSize * clustererNN.mNnClusterizerBatchedMode), write_idx, base_idx, transient_index, row_idx, time_idx, r_local, t_local);
252-
// }
253-
// if ((clusterer.mPmemory->counters.nClusters - batchStart) < clustererNN.mNnClusterizerBatchedMode) {
254-
// if (write_idx >= ((clusterer.mPmemory->counters.nClusters - batchStart) * clustererNN.mNnClusterizerElementSize)) {
255-
// printf("Error: Write index out of bounds (end of array)! %d >= %d (write_idx: %d, base_idx: %d, transient_index: %d, row_idx: %d, time_idx: %d, r_local: %d, t_local: %d)\n",
256-
// write_idx, (int)((clusterer.mPmemory->counters.nClusters - batchStart) * clustererNN.mNnClusterizerElementSize), write_idx, base_idx, transient_index, row_idx, time_idx, r_local, t_local);
257-
// }
258-
// if (write_idx > ((clusterer.mPmemory->counters.nClusters - batchStart) * clustererNN.mNnClusterizerElementSize - 5)) {
259-
// printf("Sanity check (should appear only once) %d == %d (write_idx: %d, base_idx: %d, transient_index: %d, row_idx: %d, time_idx: %d, r_local: %d, t_local: %d)\n",
260-
// write_idx, (int)((clusterer.mPmemory->counters.nClusters - batchStart) * clustererNN.mNnClusterizerElementSize - 4), write_idx, base_idx, transient_index, row_idx, time_idx, r_local, t_local);
261-
// }
262-
// }
263-
264-
write_idx += clustererNN.mNnClusterizerFullTimeSize; // Move to next pad position
262+
write_idx += clustererNN.mNnClusterizerFullTimeSize;
265263
}
266264
}
267265
}
@@ -275,6 +273,10 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::det
275273
if (glo_idx + batchStart >= clusterer.mPmemory->counters.nClusters || glo_idx >= (uint32_t)clustererNN.mNnClusterizerBatchedMode) {
276274
return;
277275
}
276+
if (glo_idx + batchStart >= clustererNN.mNnClusterizerTotalClusters) {
277+
printf("Error: Class output index out of bounds! %d >= %d (glo_idx: %d, batchStart: %d, mNnClusterizerBatchedMode: %d, mNnClusterizerModelClassNumOutputNodes: %d, clusterer.mPmemory->counters.nClusters %d)\n",
278+
glo_idx + batchStart, clustererNN.mNnClusterizerTotalClusters, glo_idx, batchStart, clustererNN.mNnClusterizerBatchedMode, clustererNN.mNnClusterizerModelClassNumOutputNodes, clusterer.mPmemory->counters.nClusters);
279+
}
278280
if (clustererNN.mNnClusterizerUseClassification) {
279281
if (dtype == 0) {
280282
clustererNN.mOutputDataClass[glo_idx + batchStart] = (int32_t)((clustererNN.mModelProbabilities_16[glo_idx]).ToFloat() > clustererNN.mNnClassThreshold);
@@ -364,6 +366,11 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
364366
return;
365367
}
366368

369+
if (full_glo_idx >= clustererNN.mNnClusterizerBatchedMode * clustererNN.mNnClusterizerModelReg1NumOutputNodes) {
370+
printf("Error: Global index out of bounds! %d >= %d (full_glo_idx: %d, maxClusterNum: %d, batchStart: %d)\n",
371+
full_glo_idx, clustererNN.mNnClusterizerBatchedMode * clustererNN.mNnClusterizerModelReg1NumOutputNodes, full_glo_idx, maxClusterNum, batchStart);
372+
}
373+
367374
tpc::ClusterNative* clusterOut = clusterer.mPclusterByRow;
368375

369376
ClusterAccumulator pc;
@@ -737,16 +744,16 @@ GPUd() int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t offset
737744
return (row > 62 ? offset : 0);
738745
}
739746

740-
GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t offset)
747+
GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t maxrow, int32_t iroc_row, int32_t npads_row, int32_t npads_reference)
741748
{
742-
if (pad < 0 || row < 0) { // Faster short-circuit
749+
if (pad < 0) { // Faster short-circuit
743750
return true;
744751
} else if (row < 63) {
745-
return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row)));
746-
} else if (row < (63 + offset)) { // to account for the gap between IROC and OROC. Charge will be set to the boundary fill value in order to signal boundaries to the neural network
752+
return (pad >= npads_row);
753+
} else if (row < iroc_row) { // to account for the gap between IROC and OROC. Charge will be set to the boundary fill value in order to signal boundaries to the neural network
747754
return true;
748-
} else if (row < (o2::tpc::constants::MAXGLOBALPADROW + offset)) {
749-
return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row - offset)));
755+
} else if (row < maxrow) {
756+
return (pad >= npads_reference);
750757
} else {
751758
return true;
752759
}

0 commit comments

Comments
 (0)