@@ -85,7 +85,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
8585 for (int32_t t = -clustererNN.mNnClusterizerSizeInputTime ; t <= clustererNN.mNnClusterizerSizeInputTime ; ++t) {
8686 int32_t target_time = time + t;
8787
88- if (is_boundary || target_time < 0 || target_time >= TPC_MAX_FRAGMENT_LEN_GPU ) {
88+ if (is_boundary || target_time < 0 || target_time >= clustererNN. maxAllowedTimebin ) {
8989 // Fill boundary value
9090 float boundary_value = static_cast <float >(clustererNN.mNnClusterizerBoundaryFillValue );
9191 if (dtype == 0 ) {
@@ -229,7 +229,7 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::fil
229229 int32_t target_time = time + t_local;
230230
231231 // Optimized boundary check
232- int8_t is_boundary = GPUTPCNNClusterizerKernels::isBoundary (target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow ) || (target_time < 0 ) || (target_time >= TPC_MAX_FRAGMENT_LEN_GPU );
232+ int8_t is_boundary = GPUTPCNNClusterizerKernels::isBoundary (target_row + row_offset, target_pad, clustererNN.mNnClusterizerSizeInputRow ) || (target_time < 0 ) || (target_time >= clustererNN. maxAllowedTimebin );
233233
234234 float output_value;
235235 if (is_boundary) {
@@ -340,7 +340,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
340340 int32_t model_output_index = glo_idx * clustererNN.mNnClusterizerModelReg1NumOutputNodes ;
341341
342342 CfArray2D<PackedCharge> chargeMap (reinterpret_cast <PackedCharge*>(clusterer.mPchargeMap ));
343- CfChargePos peak = clusterer.mPfilteredPeakPositions [CAMath::Min (full_glo_idx, maxClusterNum - 1 )];
343+ uint32_t peakIndex = CAMath::Min (full_glo_idx, maxClusterNum - 1 );
344+ CfChargePos peak = clusterer.mPfilteredPeakPositions [peakIndex];
344345 float central_charge = static_cast <float >(chargeMap[peak].unpack ());
345346
346347 CPU_ONLY (MCLabelAccumulator labelAccElem (clusterer));
@@ -365,11 +366,8 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
365366
366367 tpc::ClusterNative* clusterOut = clusterer.mPclusterByRow ;
367368
368- // LOG(info) << glo_idx << " -- " << model_output_index << " / " << clustererNN.outputDataReg1.size() << " / " << clustererNN.mNnClusterizerModelReg1NumOutputNodes << " -- " << clusterer.peakPositions.size() << " -- " << clusterer.centralCharges.size();
369-
370369 ClusterAccumulator pc;
371370
372- // Publishing logic is taken from default clusterizer
373371 if (withMC) {
374372 ClusterAccumulator dummy_pc;
375373 CPU_ONLY (labelAcc->collect (peak, central_charge));
@@ -390,37 +388,113 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
390388 return ;
391389 }
392390
393- // For flag influence on cluster error setting: O2/GPU/GPUTracking/Base/GPUParam.inc -> UpdateClusterError2ByState
394391 bool notSinglePad = false , notSingleTime = false ;
395392 for (uint16_t i = 0 ; i < 8 ; i++) {
396393 Delta2 d = cfconsts::InnerNeighbors[i];
397394 CfChargePos tmp_pos = peak.delta (d);
398- notSinglePad |= (d.x != 0 ) && (static_cast <float >(chargeMap[tmp_pos].unpack ()) > 0 );
399- notSingleTime |= (d.y != 0 ) && (static_cast <float >(chargeMap[tmp_pos].unpack ()) > 0 );
395+ float v = static_cast <float >(chargeMap[tmp_pos].unpack ());
396+ notSinglePad |= (d.x != 0 ) && (v > 0 .f );
397+ notSingleTime |= (d.y != 0 ) && (v > 0 .f );
400398 }
401399
400+ float publishPadPosition = 0 .f , publishTimePosition = 0 .f ;
402401 if (dtype == 0 ) {
402+ publishPadPosition = static_cast <float >(peak.pad ()) + clustererNN.mOutputDataReg1_16 [model_output_index].ToFloat ();
403+ publishTimePosition = static_cast <float >(peak.time ()) + clustererNN.mOutputDataReg1_16 [model_output_index + 1 ].ToFloat ();
404+ isBoundaryPublish (full_glo_idx, static_cast <int32_t >(peak.row ()), publishPadPosition, publishTimePosition);
403405 pc.setFull (central_charge * clustererNN.mOutputDataReg1_16 [model_output_index + 4 ].ToFloat (),
404- static_cast < float >(peak. pad ()) + clustererNN. mOutputDataReg1_16 [model_output_index]. ToFloat () ,
406+ publishPadPosition ,
405407 notSinglePad ? clustererNN.mOutputDataReg1_16 [model_output_index + 2 ].ToFloat () : 0 .f ,
406- (clusterer.mPmemory ->fragment ).start + static_cast < float >(peak. time ()) + clustererNN. mOutputDataReg1_16 [model_output_index + 1 ]. ToFloat () ,
408+ (clusterer.mPmemory ->fragment ).start + publishTimePosition ,
407409 notSingleTime ? clustererNN.mOutputDataReg1_16 [model_output_index + 3 ].ToFloat () : 0 .f ,
408410 clustererNN.mClusterFlags [2 * glo_idx],
409411 clustererNN.mClusterFlags [2 * glo_idx + 1 ]);
410- } else if (dtype == 1 ) {
412+ } else {
413+ publishPadPosition = static_cast <float >(peak.pad ()) + clustererNN.mOutputDataReg1_32 [model_output_index];
414+ publishTimePosition = static_cast <float >(peak.time ()) + clustererNN.mOutputDataReg1_32 [model_output_index + 1 ];
415+ isBoundaryPublish (full_glo_idx, static_cast <int32_t >(peak.row ()), publishPadPosition, publishTimePosition);
411416 pc.setFull (central_charge * clustererNN.mOutputDataReg1_32 [model_output_index + 4 ],
412- static_cast < float >(peak. pad ()) + clustererNN. mOutputDataReg1_32 [model_output_index] ,
417+ publishPadPosition ,
413418 notSinglePad ? clustererNN.mOutputDataReg1_32 [model_output_index + 2 ] : 0 .f ,
414- (clusterer.mPmemory ->fragment ).start + static_cast < float >(peak. time ()) + clustererNN. mOutputDataReg1_32 [model_output_index + 1 ] ,
419+ (clusterer.mPmemory ->fragment ).start + publishTimePosition ,
415420 notSingleTime ? clustererNN.mOutputDataReg1_32 [model_output_index + 3 ] : 0 .f ,
416421 clustererNN.mClusterFlags [2 * glo_idx],
417422 clustererNN.mClusterFlags [2 * glo_idx + 1 ]);
418423 }
419424
425+ // if (boundaryFlag != 0) { // Prints the entire NN input for the given index
426+ // // Build a simple buffer manually (float with 3 decimals)
427+ // const int MAX_CHARS = 4096;
428+ // char buffer[MAX_CHARS];
429+ // int pos = 0;
430+ //
431+ // auto appendChar = [&](char c) {
432+ // if (pos < MAX_CHARS - 1) buffer[pos++] = c;
433+ // };
434+ // auto appendStr = [&](const char* s) {
435+ // while (*s && pos < MAX_CHARS - 1) buffer[pos++] = *s++;
436+ // };
437+ // auto appendUInt = [&](uint32_t v) {
438+ // char tmp[16]; int tp = 0;
439+ // if (v == 0) { appendChar('0'); return; }
440+ // while (v && tp < 16) { tmp[tp++] = char('0' + (v % 10)); v /= 10; }
441+ // while (tp--) appendChar(tmp[tp]);
442+ // };
443+ // auto appendInt = [&](int v) {
444+ // if (v < 0) { appendChar('-'); v = -v; }
445+ // appendUInt((uint32_t)v);
446+ // };
447+ // auto appendFloat = [&](float f) {
448+ // if (f < 0) { appendChar('-'); f = -f; }
449+ // int ip = (int)f;
450+ // float frac = f - (float)ip;
451+ // appendInt(ip);
452+ // appendChar('.');
453+ // for (int i = 0; i < 3; i++) {
454+ // frac *= 10.f;
455+ // int d = (int)frac;
456+ // appendChar((char)('0' + (d < 0 ? 0 : (d > 9 ? 9 : d))));
457+ // frac -= d;
458+ // if (frac < 0) frac = 0;
459+ // }
460+ // };
461+ //
462+ // appendStr("(NN CLUS) DEBUG: Boundary cluster detected (sector ");
463+ // appendUInt(sector);
464+ // appendStr(", row ");
465+ // appendUInt(peak.row());
466+ // appendStr(", pad ");
467+ // appendFloat(publishPadPosition);
468+ // appendStr(", time ");
469+ // appendFloat(publishTimePosition);
470+ // appendStr(") [glo_idx=");
471+ // appendUInt(glo_idx);
472+ // appendStr(" elemSize=");
473+ // appendInt(clustererNN.mNnClusterizerElementSize);
474+ // appendStr(" dtype=");
475+ // appendInt(dtype);
476+ // appendStr("] INPUT:");
477+ //
478+ // int elemSize = clustererNN.mNnClusterizerElementSize;
479+ // int baseIdx = glo_idx * elemSize;
480+ //
481+ // int maxPrint = elemSize;
482+ // for (int i = 0; i < maxPrint; ++i) {
483+ // appendChar(' ');
484+ // float v = (dtype == 0) ? clustererNN.mInputData_16[baseIdx + i].ToFloat()
485+ // : clustererNN.mInputData_32[baseIdx + i];
486+ // appendFloat(v);
487+ // if (pos > (MAX_CHARS - 32)) { appendStr(" ..."); break; }
488+ // }
489+ //
490+ // buffer[pos] = 0;
491+ // printf("%s\n", buffer);
492+ // }
493+
420494 tpc::ClusterNative myCluster;
421495 bool rejectCluster = !pc.toNative (peak, central_charge, myCluster, clusterer.Param (), chargeMap);
422496 if (clustererNN.mNnClusterizerUseClassification ) {
423- rejectCluster |= (clustererNN.mOutputDataClass [CAMath::Min (full_glo_idx, ( uint32_t )clusterer. mPmemory -> counters . nClusters - 1 ) ] <= 0 );
497+ rejectCluster |= (clustererNN.mOutputDataClass [peakIndex ] <= 0 );
424498 }
425499 if (rejectCluster) {
426500 if (clusterer.mPclusterPosInRow ) {
@@ -509,19 +583,26 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
509583 }
510584
511585 // Cluster 1
586+ float publishPadPosition = 0 .f , publishTimePosition = 0 .f ;
512587 if (dtype == 0 ) {
588+ publishPadPosition = static_cast <float >(peak.pad ()) + clustererNN.mOutputDataReg2_16 [model_output_index].ToFloat ();
589+ publishTimePosition = static_cast <float >(peak.time ()) + clustererNN.mOutputDataReg2_16 [model_output_index + 1 ].ToFloat ();
590+ isBoundaryPublish (full_glo_idx, static_cast <int32_t >(peak.row ()), publishPadPosition, publishTimePosition);
513591 pc.setFull (central_charge * clustererNN.mOutputDataReg2_16 [model_output_index + 8 ].ToFloat (),
514- static_cast < float >(peak. pad ()) + clustererNN. mOutputDataReg2_16 [model_output_index]. ToFloat () ,
592+ publishPadPosition ,
515593 clustererNN.mOutputDataReg2_16 [model_output_index + 4 ].ToFloat (),
516- (clusterer.mPmemory ->fragment ).start + static_cast < float >(peak. time ()) + clustererNN. mOutputDataReg2_16 [model_output_index + 2 ]. ToFloat () ,
594+ (clusterer.mPmemory ->fragment ).start + publishTimePosition ,
517595 clustererNN.mOutputDataReg2_16 [model_output_index + 6 ].ToFloat (),
518596 clustererNN.mClusterFlags [2 * glo_idx],
519597 clustererNN.mClusterFlags [2 * glo_idx + 1 ]);
520598 } else if (dtype == 1 ) {
599+ publishPadPosition = static_cast <float >(peak.pad ()) + clustererNN.mOutputDataReg2_32 [model_output_index];
600+ publishTimePosition = static_cast <float >(peak.time ()) + clustererNN.mOutputDataReg2_32 [model_output_index + 1 ];
601+ isBoundaryPublish (full_glo_idx, static_cast <int32_t >(peak.row ()), publishPadPosition, publishTimePosition);
521602 pc.setFull (central_charge * clustererNN.mOutputDataReg2_32 [model_output_index + 8 ],
522- static_cast < float >(peak. pad ()) + clustererNN. mOutputDataReg2_32 [model_output_index] ,
603+ publishPadPosition ,
523604 clustererNN.mOutputDataReg2_32 [model_output_index + 4 ],
524- (clusterer.mPmemory ->fragment ).start + static_cast < float >(peak. time ()) + clustererNN. mOutputDataReg2_32 [model_output_index + 2 ] ,
605+ (clusterer.mPmemory ->fragment ).start + publishTimePosition ,
525606 clustererNN.mOutputDataReg2_32 [model_output_index + 6 ],
526607 clustererNN.mClusterFlags [2 * glo_idx],
527608 clustererNN.mClusterFlags [2 * glo_idx + 1 ]);
@@ -558,18 +639,24 @@ GPUdii() void GPUTPCNNClusterizerKernels::Thread<GPUTPCNNClusterizerKernels::pub
558639
559640 // Cluster 2
560641 if (dtype == 0 ) {
642+ publishPadPosition = static_cast <float >(peak.pad ()) + clustererNN.mOutputDataReg2_16 [model_output_index + 1 ].ToFloat ();
643+ publishTimePosition = static_cast <float >(peak.time ()) + clustererNN.mOutputDataReg2_16 [model_output_index + 3 ].ToFloat ();
644+ isBoundaryPublish (full_glo_idx, static_cast <int32_t >(peak.row ()), publishPadPosition, publishTimePosition);
561645 pc.setFull (central_charge * clustererNN.mOutputDataReg2_16 [model_output_index + 9 ].ToFloat (),
562- static_cast < float >(peak. pad ()) + clustererNN. mOutputDataReg2_16 [model_output_index + 1 ]. ToFloat () ,
646+ publishPadPosition ,
563647 clustererNN.mOutputDataReg2_16 [model_output_index + 5 ].ToFloat (),
564- (clusterer.mPmemory ->fragment ).start + static_cast < float >(peak. time ()) + clustererNN. mOutputDataReg2_16 [model_output_index + 3 ]. ToFloat () ,
648+ (clusterer.mPmemory ->fragment ).start + publishTimePosition ,
565649 clustererNN.mOutputDataReg2_16 [model_output_index + 7 ].ToFloat (),
566650 clustererNN.mClusterFlags [2 * glo_idx],
567651 clustererNN.mClusterFlags [2 * glo_idx + 1 ]);
568652 } else if (dtype == 1 ) {
653+ publishPadPosition = static_cast <float >(peak.pad ()) + clustererNN.mOutputDataReg2_32 [model_output_index + 1 ];
654+ publishTimePosition = static_cast <float >(peak.time ()) + clustererNN.mOutputDataReg2_32 [model_output_index + 3 ];
655+ isBoundaryPublish (full_glo_idx, static_cast <int32_t >(peak.row ()), publishPadPosition, publishTimePosition);
569656 pc.setFull (central_charge * clustererNN.mOutputDataReg2_32 [model_output_index + 9 ],
570- static_cast < float >(peak. pad ()) + clustererNN. mOutputDataReg2_32 [model_output_index + 1 ] ,
657+ publishPadPosition ,
571658 clustererNN.mOutputDataReg2_32 [model_output_index + 5 ],
572- (clusterer.mPmemory ->fragment ).start + static_cast < float >(peak. time ()) + clustererNN. mOutputDataReg2_32 [model_output_index + 3 ] ,
659+ (clusterer.mPmemory ->fragment ).start + publishTimePosition ,
573660 clustererNN.mOutputDataReg2_32 [model_output_index + 7 ],
574661 clustererNN.mClusterFlags [2 * glo_idx],
575662 clustererNN.mClusterFlags [2 * glo_idx + 1 ]);
@@ -664,3 +751,26 @@ GPUd() bool GPUTPCNNClusterizerKernels::isBoundary(int32_t row, int32_t pad, int
664751 return true ;
665752 }
666753}
754+
755+ GPUd () bool GPUTPCNNClusterizerKernels::isBoundaryPublish(int32_t idx, int32_t row, float & pad, float & time)
756+ {
757+ if (pad < 0 ) {
758+ // printf("(NN CLUS) WARNING: Boundary detected, idx = %d, pad < 0: row %d, pad %f (%d, %d), time %f (%d, %d)\n", idx, row, pad, 0, static_cast<int>(GPUTPCGeometry::NPads(row)), time, 0, TPC_MAX_FRAGMENT_LEN_GPU);
759+ pad = 0 .f ;
760+ return true ;
761+ } else if (pad >= static_cast <int >(GPUTPCGeometry::NPads (row))) {
762+ // printf("(NN CLUS) WARNING: Boundary detected, idx = %d, pad >= static_cast<int>(GPUTPCGeometry::NPads(row): row %d, pad %f (%d, %d), time %f (%d, %d)\n", idx, row, pad, 0, static_cast<int>(GPUTPCGeometry::NPads(row)), time, 0, TPC_MAX_FRAGMENT_LEN_GPU);
763+ pad = static_cast <float >(GPUTPCGeometry::NPads (row) - 1 );
764+ return true ;
765+ } else if (time < 0 ) {
766+ // printf("(NN CLUS) WARNING: Boundary detected, idx = %d, time < 0: row %d, pad %f (%d, %d), time %f (%d, %d)\n", idx, row, pad, 0, static_cast<int>(GPUTPCGeometry::NPads(row)), time, 0, TPC_MAX_FRAGMENT_LEN_GPU);
767+ time = 0 .f ;
768+ return true ;
769+ } else if (time >= TPC_MAX_FRAGMENT_LEN_GPU) {
770+ // printf("(NN CLUS) WARNING: Boundary detected, idx = %d, time >= TPC_MAX_FRAGMENT_LEN_GPU: row %d, pad %f (%d, %d), time %f (%d, %d)\n", idx, row, pad, 0, static_cast<int>(GPUTPCGeometry::NPads(row)), time, 0, TPC_MAX_FRAGMENT_LEN_GPU);
771+ time = static_cast <float >(TPC_MAX_FRAGMENT_LEN_GPU - 1 );
772+ return true ;
773+ } else {
774+ return false ;
775+ }
776+ }
0 commit comments