@@ -61,25 +61,31 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
6161 CfArray2D<PackedCharge> chargeMap (reinterpret_cast <PackedCharge*>(clusterer.mPchargeMap ));
6262 CfArray2D<uint8_t > isPeakMap (clusterer.mPpeakMap );
6363 CfChargePos peak = clusterer.mPfilteredPeakPositions [CAMath::Min (glo_idx + batchStart, (uint32_t )(clusterer.mPmemory ->counters .nClusters - 1 ))];
64- int32_t row = static_cast <int >(peak.row ()), pad = static_cast <int >(peak.pad ()), time = static_cast <int >(peak.time ()); // Explicit casting to avoid conversion errors
64+ int32_t row = static_cast <int32_t >(peak.row ()), pad = static_cast <int32_t >(peak.pad ()), time = static_cast <int32_t >(peak.time ()); // Explicit casting to avoid conversion errors
6565 float central_charge = static_cast <float >(chargeMap[peak].unpack ());
6666 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset (row, clustererNN.mNnClusterizerSizeInputRow );
6767
6868 for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow ; r <= clustererNN.mNnClusterizerSizeInputRow ; r++) {
69- bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1 )) || (( row + r) < 0 );
70- int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset ( row, row + r) ;
71- for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) {
72- bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary (row + r + row_offset, pad + p, clustererNN. mNnClusterizerSizeInputRow ) ;
69+ int32_t pad_offset = GPUTPCNNClusterizerKernels::padOffset ( row, row + r);
70+ int32_t row_pos = row + r;
71+ for (int32_t p = ( -clustererNN.mNnClusterizerSizeInputPad + pad_offset) ; p <= ( clustererNN.mNnClusterizerSizeInputPad + pad_offset) ; p++) {
72+ int32_t pad_pos = pad + p;
7373 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime ; t <= clustererNN.mNnClusterizerSizeInputTime ; t++) {
7474 int32_t time_pos = time + t;
75- if (!is_boundary && (time_pos >= 0 ) && (time_pos < TPC_MAX_FRAGMENT_LEN_GPU)) {
76- CfChargePos tmp_pos (row + r, pad + p, time + t);
77- if (r == 0 && !clustererNN.mClusterFlags [2 * glo_idx] && CAMath::Abs (p) < 3 && CAMath::Abs (t) < 3 && p != 0 && t != 0 ) { // ordering is done for short circuit optimization
78- clustererNN.mClusterFlags [2 * glo_idx] += CfUtils::isPeak (isPeakMap[tmp_pos]);
79- clustererNN.mClusterFlags [2 * glo_idx + 1 ] = clustererNN.mClusterFlags [2 * glo_idx];
75+ int32_t isBoundaryIndex = (row_pos + row_offset + clustererNN.mNnClusterizerSizeInputRow ) * clustererNN.mBoundaryMapSizePadsPerRow + pad_pos + clustererNN.mBoundaryPadding ;
76+ if (!clustererNN.mIsBoundary [isBoundaryIndex] && (time_pos >= 0 ) && (time_pos < TPC_MAX_FRAGMENT_LEN_GPU)) {
77+ CfChargePos tmp_pos (row_pos, pad_pos, time_pos);
78+ if (!clustererNN.mNnClusterizerSetDeconvolutionFlags ) { // Only if deconvolution flags are not set
79+ if (r == 0 && !clustererNN.mClusterFlags [2 * glo_idx] && CAMath::Abs (p) < 3 && CAMath::Abs (t) < 3 && p != 0 && t != 0 ) { // ordering is done for short circuit optimization
80+ clustererNN.mClusterFlags [2 * glo_idx] += CfUtils::isPeak (isPeakMap[tmp_pos]);
81+ clustererNN.mClusterFlags [2 * glo_idx + 1 ] = clustererNN.mClusterFlags [2 * glo_idx];
82+ }
8083 }
8184 if (dtype == 0 ) {
8285 clustererNN.mInputData_16 [write_idx] = (OrtDataType::Float16_t)(static_cast <float >(chargeMap[tmp_pos].unpack ()) / central_charge);
86+ // if(CAMath::Abs(static_cast<float>(clustererNN.mInputData_16[write_idx]) - static_cast<float>(clustererNN.mInputData_16[write_idx])) > 1e-6) {
87+ // printf("Warning: (Charge) Charge difference at idx %d, batchStart %d, maxClusters %d, sector %d, row %d (%d), pad %d (%d), time %d (%d): %f / %f\n", glo_idx, batchStart, clusterer.mPmemory->counters.nClusters - 1, sector, row_pos, r, pad_pos, p, time_pos, t, static_cast<float>(clustererNN.mInputData_16[write_idx]), static_cast<float>(clustererNN.mInputData_16[write_idx]));
88+ // }
8389 } else if (dtype == 1 ) {
8490 clustererNN.mInputData_32 [write_idx] = static_cast <float >(chargeMap[tmp_pos].unpack ()) / central_charge;
8591 }
@@ -507,16 +513,17 @@ GPUd() int32_t GPUTPCNNClusterizerKernels::rowOffset(int32_t row, int32_t offset
507513 return (row > 62 ? offset : 0 );
508514}
509515
516+ // Legacy. Deprecated.
510517GPUd () bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int32_t offset)
511518{
512519 if (pad < 0 || row < 0 ) { // Faster short-circuit
513520 return true ;
514521 } else if (row < 63 ) {
515- return (pad >= static_cast <int >(GPUTPCGeometry::NPads (row)));
522+ return (( pad < 0 ) || (pad >= static_cast <int >(GPUTPCGeometry::NPads (row) )));
516523 } else if (row < (63 + offset)) { // to account for the gap between IROC and OROC. Charge will be set to the boundary fill value in order to signal boundaries to the neural network
517524 return true ;
518525 } else if (row < (o2::tpc::constants::MAXGLOBALPADROW + offset)) {
519- return (pad >= static_cast <int >(GPUTPCGeometry::NPads (row - offset)));
526+ return (( pad < 0 ) || (pad >= static_cast <int >(GPUTPCGeometry::NPads (row - offset) )));
520527 } else {
521528 return true ;
522529 }
0 commit comments