Skip to content

Commit fdf6ef8

Browse files
committed
Adjusting CPU kernel
1 parent 1ac8c2c commit fdf6ef8

File tree

2 files changed

+19
-39
lines changed

2 files changed

+19
-39
lines changed

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -67,33 +67,6 @@ void* GPUTPCNNClusterizer::setIOPointers(void* mem)
6767
return mem;
6868
}
6969

70-
// std::vector<int32_t> GPUTPCNNClusterizer::pointerSizes() {
71-
// std::vector<int32_t> sizes(7, -1);
72-
// if (mNnClusterizerBatchedMode > 0) {
73-
// if (mNnInferenceInputDType == 0 && mNnClusterizerElementSize > 0) {
74-
// sizes[0] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData16
75-
// } else if (mNnInferenceInputDType == 1 && mNnClusterizerElementSize > 0) {
76-
// sizes[1] = mNnClusterizerBatchedMode * mNnClusterizerElementSize; // inputData32
77-
// }
78-
// sizes[2] = 2 * mNnClusterizerBatchedMode; // mClusterFlags
79-
// if (mNnClusterizerModelClassNumOutputNodes > 0) {
80-
// sizes[3] = mNnClusterizerBatchedMode * mNnClusterizerModelClassNumOutputNodes; // modelProbabilities
81-
// }
82-
// if (!mNnClusterizerUseCfRegression) {
83-
// if (mNnClusterizerModelReg1NumOutputNodes > 0) {
84-
// sizes[4] = mNnClusterizerBatchedMode * mNnClusterizerModelReg1NumOutputNodes; // outputDataReg1
85-
// }
86-
// if (mNnClusterizerModelReg2NumOutputNodes > 0) {
87-
// sizes[5] = mNnClusterizerBatchedMode * mNnClusterizerModelReg2NumOutputNodes; // outputDataReg2
88-
// }
89-
// }
90-
// }
91-
// if (mNnClusterizerTotalClusters > 0) {
92-
// sizes[6] = mNnClusterizerTotalClusters; // mOutputDataClass
93-
// }
94-
// return sizes;
95-
// }
96-
9770
void GPUTPCNNClusterizer::RegisterMemoryAllocation()
9871
{
9972
AllocateAndInitializeLate();

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.cxx

Lines changed: 19 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -61,25 +61,31 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
6161
CfArray2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
6262
CfArray2D<uint8_t> isPeakMap(clusterer.mPpeakMap);
6363
CfChargePos peak = clusterer.mPfilteredPeakPositions[CAMath::Min(glo_idx + batchStart, (uint32_t)(clusterer.mPmemory->counters.nClusters - 1))];
64-
int32_t row = static_cast<int>(peak.row()), pad = static_cast<int>(peak.pad()), time = static_cast<int>(peak.time()); // Explicit casting to avoid conversion errors
64+
int32_t row = static_cast<int32_t>(peak.row()), pad = static_cast<int32_t>(peak.pad()), time = static_cast<int32_t>(peak.time()); // Explicit casting to avoid conversion errors
6565
float central_charge = static_cast<float>(chargeMap[peak].unpack());
6666
int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset(row, clustererNN.mNnClusterizerSizeInputRow);
6767

6868
for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow; r <= clustererNN.mNnClusterizerSizeInputRow; r++) {
69-
bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1)) || ((row + r) < 0);
70-
int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset(row, row + r);
71-
for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) {
72-
bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary(row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow);
69+
int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset(row, row + r);
70+
int32_t row_pos = row + r;
71+
for (int32_t p = (-clustererNN.mNnClusterizerSizeInputPad + pad_offset); p <= (clustererNN.mNnClusterizerSizeInputPad + pad_offset); p++) {
72+
int32_t pad_pos = pad + p;
7373
for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime; t <= clustererNN.mNnClusterizerSizeInputTime; t++) {
7474
int32_t time_pos = time + t;
75-
if (!is_boundary && (time_pos >= 0) && (time_pos < TPC_MAX_FRAGMENT_LEN_GPU)) {
76-
CfChargePos tmp_pos(row + r, pad + p, time + t);
77-
if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
78-
clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
79-
clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
75+
int32_t isBoundaryIndex = (row_pos + row_offset + clustererNN.mNnClusterizerSizeInputRow) * clustererNN.mBoundaryMapSizePadsPerRow + pad_pos + clustererNN.mBoundaryPadding;
76+
if (!clustererNN.mIsBoundary[isBoundaryIndex] && (time_pos >= 0) && (time_pos < TPC_MAX_FRAGMENT_LEN_GPU)) {
77+
CfChargePos tmp_pos(row_pos, pad_pos, time_pos);
78+
if (!clustererNN.mNnClusterizerSetDeconvolutionFlags) { // Only if deconvolution flags are not set
79+
if (r == 0 && !clustererNN.mClusterFlags[2 * glo_idx] && CAMath::Abs(p) < 3 && CAMath::Abs(t) < 3 && p != 0 && t != 0) { // ordering is done for short circuit optimization
80+
clustererNN.mClusterFlags[2 * glo_idx] += CfUtils::isPeak(isPeakMap[tmp_pos]);
81+
clustererNN.mClusterFlags[2 * glo_idx + 1] = clustererNN.mClusterFlags[2 * glo_idx];
82+
}
8083
}
8184
if (dtype == 0) {
8285
clustererNN.mInputData_16[write_idx] = (OrtDataType::Float16_t)(static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge);
86+
// if(CAMath::Abs(static_cast<float>(clustererNN.mInputData_16[write_idx]) - static_cast<float>(clustererNN.mInputData_16[write_idx])) > 1e-6) {
87+
// printf("Warning: (Charge) Charge difference at idx %d, batchStart %d, maxClusters %d, sector %d, row %d (%d), pad %d (%d), time %d (%d): %f / %f\n", glo_idx, batchStart, clusterer.mPmemory->counters.nClusters - 1, sector, row_pos, r, pad_pos, p, time_pos, t, static_cast<float>(clustererNN.mInputData_16[write_idx]), static_cast<float>(clustererNN.mInputData_16[write_idx]));
88+
// }
8389
} else if (dtype == 1) {
8490
clustererNN.mInputData_32[write_idx] = static_cast<float>(chargeMap[tmp_pos].unpack()) / central_charge;
8591
}
@@ -507,16 +513,17 @@ GPUd() int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t offset
507513
return (row > 62 ? offset : 0);
508514
}
509515

516+
// Legacy. Deprecated.
510517
GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t offset)
511518
{
512519
if (pad < 0 || row < 0) { // Faster short-circuit
513520
return true;
514521
} else if (row < 63) {
515-
return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row)));
522+
return ((pad < 0) || (pad >= static_cast<int>(GPUTPCGeometry::NPads(row))));
516523
} else if (row < (63 + offset)) { // to account for the gap between IROC and OROC. Charge will be set to the boundary fill value in order to signal boundaries to the neural network
517524
return true;
518525
} else if (row < (o2::tpc::constants::MAXGLOBALPADROW + offset)) {
519-
return (pad >= static_cast<int>(GPUTPCGeometry::NPads(row - offset)));
526+
return ((pad < 0) || (pad >= static_cast<int>(GPUTPCGeometry::NPads(row - offset))));
520527
} else {
521528
return true;
522529
}

0 commit comments

Comments
 (0)