@@ -51,7 +51,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::run
5151}
5252
5353template <>
54- GPUdii () void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNN >(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
54+ GPUdii () void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNCPU >(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
5555{
5656 uint32_t glo_idx = get_global_id (0 );
5757 auto & clusterer = processors.tpcClusterer [sector];
@@ -65,16 +65,14 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
6565 float central_charge = static_cast <float >(chargeMap[peak].unpack ());
6666 int32_t row_offset = GPUTPCNNClusterizerKernels::rowOffset (row, clustererNN.mNnClusterizerSizeInputRow );
6767
68- #ifndef GPUCA_GPUCODE
69- GPUCA_UNROLL (U (), U ());
70- #endif
7168 for (int32_t r = -clustererNN.mNnClusterizerSizeInputRow ; r <= clustererNN.mNnClusterizerSizeInputRow ; r++) {
7269 bool is_row_boundary = ((row + r) > (o2::tpc::constants::MAXGLOBALPADROW - 1 )) || ((row + r) < 0 );
7370 int32_t pad_offset = is_row_boundary ? 0 : GPUTPCNNClusterizerKernels::padOffset (row, row + r);
7471 for (int32_t p = -clustererNN.mNnClusterizerSizeInputPad + pad_offset; p <= clustererNN.mNnClusterizerSizeInputPad + pad_offset; p++) {
7572 bool is_boundary = is_row_boundary || GPUTPCNNClusterizerKernels::isBoundary (row + r + row_offset, pad + p, clustererNN.mNnClusterizerSizeInputRow );
7673 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime ; t <= clustererNN.mNnClusterizerSizeInputTime ; t++) {
77- if (!is_boundary) {
74+ int32_t time_pos = time + t;
75+ if (!is_boundary && (time_pos >= 0 ) && (time_pos < TPC_MAX_FRAGMENT_LEN_GPU)) {
7876 CfChargePos tmp_pos (row + r, pad + p, time + t);
7977 if (r == 0 && !clustererNN.mClusterFlags [2 * glo_idx] && CAMath::Abs (p) < 3 && CAMath::Abs (t) < 3 && p != 0 && t != 0 ) { // ordering is done for short circuit optimization
8078 clustererNN.mClusterFlags [2 * glo_idx] += CfUtils::isPeak (isPeakMap[tmp_pos]);
@@ -108,10 +106,20 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
108106 clustererNN.mInputData_32 [write_idx + 2 ] = static_cast <float >(pad) / GPUTPCGeometry::NPads (row);
109107 }
110108 }
109+ if (!clustererNN.mNnClusterizerSetDeconvolutionFlags ) {
110+ clustererNN.mClusterFlags [2 * glo_idx] = 0 ;
111+ clustererNN.mClusterFlags [2 * glo_idx + 1 ] = 0 ;
112+ for (uint16_t i = 0 ; i < 8 ; i++) {
113+ Delta2 d = cfconsts::InnerNeighbors[i];
114+ CfChargePos tmp_pos = peak.delta (d);
115+ clustererNN.mClusterFlags [2 * glo_idx] += CfUtils::isPeak (isPeakMap[tmp_pos]);
116+ }
117+ clustererNN.mClusterFlags [2 * glo_idx + 1 ] = clustererNN.mClusterFlags [2 * glo_idx];
118+ }
111119}
112120
113121template <>
114- GPUdii () void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNSingleElement >(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
122+ GPUdii () void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fillInputNNGPU >(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& processors, uint8_t sector, int8_t dtype, int8_t withMC, uint32_t batchStart)
115123{
116124 uint32_t glo_idx = get_global_id (0 );
117125 auto & clusterer = processors.tpcClusterer [sector];
0 commit comments