@@ -643,26 +643,28 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
643643
644644 // Maximum of 4 lanes supported
645645 HighResTimer* nnTimers[12 ];
646- if (GetProcessingSettings ().nn .applyNNclusterizer && GetProcessingSettings ().debugLevel >= 1 ) {
647- nnTimers[0 ] = &getTimer<GPUTPCNNClusterizer, 0 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 0 );
648- nnTimers[1 ] = &getTimer<GPUTPCNNClusterizer, 1 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 1 );
649- nnTimers[2 ] = &getTimer<GPUTPCNNClusterizer, 2 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 2 );
650- nnTimers[3 ] = &getTimer<GPUTPCNNClusterizer, 3 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 3 );
651- nnTimers[4 ] = &getTimer<GPUTPCNNClusterizer, 4 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 4 );
652- nnTimers[5 ] = &getTimer<GPUTPCNNClusterizer, 5 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 5 );
653- nnTimers[6 ] = &getTimer<GPUTPCNNClusterizer, 6 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 6 );
654- nnTimers[7 ] = &getTimer<GPUTPCNNClusterizer, 7 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 7 );
655- nnTimers[8 ] = &getTimer<GPUTPCNNClusterizer, 8 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 8 );
656- nnTimers[9 ] = &getTimer<GPUTPCNNClusterizer, 9 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 9 );
657- nnTimers[10 ] = &getTimer<GPUTPCNNClusterizer, 10 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 10 );
658- nnTimers[11 ] = &getTimer<GPUTPCNNClusterizer, 11 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 11 );
659- }
660646
661647 if (GetProcessingSettings ().nn .applyNNclusterizer ) {
662648 int32_t deviceId = -1 ;
663649 int32_t numLanes = GetProcessingSettings ().nTPCClustererLanes ;
664650 int32_t maxThreads = mRec ->getNKernelHostThreads (true );
665651 // bool recreateMemoryAllocator = false;
652+
653+ if (GetProcessingSettings ().debugLevel >= 1 ) {
654+ nnTimers[0 ] = &getTimer<GPUTPCNNClusterizer, 0 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 0 );
655+ nnTimers[1 ] = &getTimer<GPUTPCNNClusterizer, 1 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 1 );
656+ nnTimers[2 ] = &getTimer<GPUTPCNNClusterizer, 2 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 2 );
657+ nnTimers[3 ] = &getTimer<GPUTPCNNClusterizer, 3 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 3 );
658+ nnTimers[4 ] = &getTimer<GPUTPCNNClusterizer, 4 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 4 );
659+ nnTimers[5 ] = &getTimer<GPUTPCNNClusterizer, 5 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 5 );
660+ nnTimers[6 ] = &getTimer<GPUTPCNNClusterizer, 6 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 6 );
661+ nnTimers[7 ] = &getTimer<GPUTPCNNClusterizer, 7 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 7 );
662+ nnTimers[8 ] = &getTimer<GPUTPCNNClusterizer, 8 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 8 );
663+ nnTimers[9 ] = &getTimer<GPUTPCNNClusterizer, 9 >(" GPUTPCNNClusterizer_ONNXClassification_0_" , 9 );
664+ nnTimers[10 ] = &getTimer<GPUTPCNNClusterizer, 10 >(" GPUTPCNNClusterizer_ONNXRegression_1_" , 10 );
665+ nnTimers[11 ] = &getTimer<GPUTPCNNClusterizer, 11 >(" GPUTPCNNClusterizer_ONNXRegression2_2_" , 11 );
666+ }
667+
666668 mRec ->runParallelOuterLoop (doGPU, numLanes, [&](uint32_t lane) {
667669 nnApplications[lane].init (nn_settings, GetProcessingSettings ().deterministicGPUReconstruction );
668670 if (nnApplications[lane].mModelsUsed [0 ]) {
@@ -708,7 +710,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
708710 LOG (info) << " (ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;
709711 }
710712 });
711- mRec -> runParallelOuterLoop (doGPU, NSECTORS, [&]( uint32_t sector) {
713+ for ( int32_t sector = 0 ; sector < NSECTORS; sector++ ) {
712714 GPUTPCNNClusterizer& clustererNN = processors ()->tpcNNClusterer [sector];
713715 GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow ()->tpcNNClusterer [sector] : clustererNN;
714716 int32_t lane = sector % numLanes;
@@ -725,7 +727,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
725727 AllocateRegisteredMemory (clustererNN.mMemoryId );
726728 // nnApplications[lane].createBoundary(clustererNNShadow);
727729 // nnApplications[lane].createIndexLookup(clustererNNShadow);
728- });
730+ }
729731 if (doGPU) {
730732 WriteToConstantMemory (RecoStep::TPCClusterFinding, (char *)&processors ()->tpcNNClusterer - (char *)processors (), &processorsShadow ()->tpcNNClusterer , sizeof (GPUTPCNNClusterizer) * NSECTORS, mRec ->NStreams () - 1, &mEvents->init);
731733 }
0 commit comments