@@ -148,9 +148,9 @@ GPUd() void GPUTPCNNClusterizerKernels::fillInputData(int32_t nBlocks, int32_t n
148148 for (int t = -clustererNN.nnClusterizerSizeInputTime ; t <= clustererNN.nnClusterizerSizeInputTime ; t++) {
149149 if (!is_boundary) {
150150 ChargePos tmp_pos (row + r, pad + p, time + t);
151- if (r == 0 && !clustererNN.clusterFlags [2 * glo_idx] && std::abs (p) < 3 && std::abs (t) < 3 && p != 0 && t != 0 ) { // ordering is done for short circuit optimization
152- clustererNN.clusterFlags [2 * glo_idx] = CfUtils::isPeak (isPeakMap[tmp_pos]);
153- clustererNN.clusterFlags [2 * glo_idx + 1 ] = clustererNN.clusterFlags [2 * glo_idx];
151+ if (r == 0 && !clustererNN.clusterFlags [2 * glo_idx] && std::abs (p) < 3 && std::abs (t) < 3 && p != 0 && t != 0 ) { // ordering is done for short circuit optimization
152+ clustererNN.clusterFlags [2 * glo_idx] = CfUtils::isPeak (isPeakMap[tmp_pos]);
153+ clustererNN.clusterFlags [2 * glo_idx + 1 ] = clustererNN.clusterFlags [2 * glo_idx];
154154 }
155155 if (dtype == 0 ) {
156156 clustererNN.inputData16 [write_idx] = (OrtDataType::Float16_t)(static_cast <float >(chargeMap[tmp_pos].unpack ()) / central_charge);
@@ -222,12 +222,12 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg1(uint glo_idx, GPUSha
222222 }
223223
224224 pc.setFull (clustererNN.centralCharges [glo_idx] * clustererNN.outputDataReg1 [model_output_index + 4 ],
225- static_cast <float >(clustererNN.peakPositions [glo_idx].pad ()) + clustererNN.outputDataReg1 [model_output_index],
226- clustererNN.outputDataReg1 [model_output_index + 2 ],
227- static_cast <float >((clusterer.mPmemory ->fragment ).start ) + static_cast <float >(clustererNN.peakPositions [glo_idx].time ()) + clustererNN.outputDataReg1 [model_output_index + 1 ],
228- clustererNN.outputDataReg1 [model_output_index + 3 ],
229- clustererNN.clusterFlags [2 * glo_idx],
230- clustererNN.clusterFlags [2 * glo_idx + 1 ]);
225+ static_cast <float >(clustererNN.peakPositions [glo_idx].pad ()) + clustererNN.outputDataReg1 [model_output_index],
226+ clustererNN.outputDataReg1 [model_output_index + 2 ],
227+ static_cast <float >((clusterer.mPmemory ->fragment ).start ) + static_cast <float >(clustererNN.peakPositions [glo_idx].time ()) + clustererNN.outputDataReg1 [model_output_index + 1 ],
228+ clustererNN.outputDataReg1 [model_output_index + 3 ],
229+ clustererNN.clusterFlags [2 * glo_idx],
230+ clustererNN.clusterFlags [2 * glo_idx + 1 ]);
231231
232232 tpc::ClusterNative myCluster;
233233 bool rejectCluster = !pc.toNative (clustererNN.peakPositions [glo_idx], clustererNN.centralCharges [glo_idx], myCluster, clusterer.Param ());
@@ -302,12 +302,12 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha
302302
303303 // Cluster 1
304304 pc.setFull (clustererNN.centralCharges [glo_idx] * clustererNN.outputDataReg2 [model_output_index + 8 ],
305- static_cast <float >(clustererNN.peakPositions [glo_idx].pad ()) + clustererNN.outputDataReg2 [model_output_index],
306- clustererNN.outputDataReg2 [model_output_index + 4 ],
307- static_cast <float >((clusterer.mPmemory ->fragment ).start ) + static_cast <float >(clustererNN.peakPositions [glo_idx].time ()) + clustererNN.outputDataReg2 [model_output_index + 2 ],
308- clustererNN.outputDataReg2 [model_output_index + 6 ],
309- clustererNN.clusterFlags [2 * glo_idx],
310- clustererNN.clusterFlags [2 * glo_idx + 1 ]);
305+ static_cast <float >(clustererNN.peakPositions [glo_idx].pad ()) + clustererNN.outputDataReg2 [model_output_index],
306+ clustererNN.outputDataReg2 [model_output_index + 4 ],
307+ static_cast <float >((clusterer.mPmemory ->fragment ).start ) + static_cast <float >(clustererNN.peakPositions [glo_idx].time ()) + clustererNN.outputDataReg2 [model_output_index + 2 ],
308+ clustererNN.outputDataReg2 [model_output_index + 6 ],
309+ clustererNN.clusterFlags [2 * glo_idx],
310+ clustererNN.clusterFlags [2 * glo_idx + 1 ]);
311311
312312 tpc::ClusterNative myCluster;
313313 bool rejectCluster = !pc.toNative (clustererNN.peakPositions [glo_idx], clustererNN.centralCharges [glo_idx], myCluster, clusterer.Param ());
@@ -337,12 +337,12 @@ GPUd() void GPUTPCNNClusterizerKernels::publishClustersReg2(uint glo_idx, GPUSha
337337
338338 // Cluster 2
339339 pc.setFull (clustererNN.centralCharges [glo_idx] * clustererNN.outputDataReg2 [model_output_index + 9 ],
340- static_cast <float >(clustererNN.peakPositions [glo_idx].pad ()) + clustererNN.outputDataReg2 [model_output_index + 1 ],
341- clustererNN.outputDataReg2 [model_output_index + 5 ],
342- static_cast <float >((clusterer.mPmemory ->fragment ).start ) + static_cast <float >(clustererNN.peakPositions [glo_idx].time ()) + clustererNN.outputDataReg2 [model_output_index + 3 ],
343- clustererNN.outputDataReg2 [model_output_index + 7 ],
344- clustererNN.clusterFlags [2 * glo_idx],
345- clustererNN.clusterFlags [2 * glo_idx + 1 ]);
340+ static_cast <float >(clustererNN.peakPositions [glo_idx].pad ()) + clustererNN.outputDataReg2 [model_output_index + 1 ],
341+ clustererNN.outputDataReg2 [model_output_index + 5 ],
342+ static_cast <float >((clusterer.mPmemory ->fragment ).start ) + static_cast <float >(clustererNN.peakPositions [glo_idx].time ()) + clustererNN.outputDataReg2 [model_output_index + 3 ],
343+ clustererNN.outputDataReg2 [model_output_index + 7 ],
344+ clustererNN.clusterFlags [2 * glo_idx],
345+ clustererNN.clusterFlags [2 * glo_idx + 1 ]);
346346
347347 rejectCluster = !pc.toNative (clustererNN.peakPositions [glo_idx], clustererNN.centralCharges [glo_idx], myCluster, clusterer.Param ());
348348 if (rejectCluster) {
0 commit comments