@@ -617,18 +617,16 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
617617
618618 if (GetProcessingSettings ().nn .applyNNclusterizer ) {
619619 uint32_t maxClusters = 0 ;
620- for (uint32_t lane = 0 ; lane < GetProcessingSettings ().nTPCClustererLanes ; lane++) {
620+ int32_t deviceId = -1 ;
621+ int32_t numLanes = GetProcessingSettings ().nTPCClustererLanes ;
622+ for (uint32_t lane = 0 ; lane < NSECTORS; lane++) {
621623 maxClusters = std::max (maxClusters, processors ()->tpcClusterer [lane].mNMaxClusters );
622624 }
623- for ( uint32_t lane = 0 ; lane < GetProcessingSettings (). nTPCClustererLanes ; lane++ ) {
625+ mRec -> runParallelOuterLoop (doGPU, numLanes, [&]( uint32_t lane) {
624626 nnApplications[lane].init (nn_settings);
625627 GPUTPCNNClusterizer& clustererNN = processors ()->tpcNNClusterer [lane];
626628 GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow ()->tpcNNClusterer [lane] : clustererNN;
627629
628- int32_t deviceId = -1 ;
629- if (clustererNNShadow.nnClusterizerVerbosity < 3 ) {
630- LOG (info) << " Allocating ONNX stream for lane " << lane << " and lane " << lane;
631- }
632630 if (nnApplications[lane].modelsUsed [0 ]) {
633631 SetONNXGPUStream ((nnApplications[lane].model_class ).getSessionOptions (), lane, &deviceId);
634632 (nnApplications[lane].model_class ).setDeviceId (deviceId);
@@ -644,21 +642,32 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
644642 (nnApplications[lane].model_reg_2 ).setDeviceId (deviceId);
645643 (nnApplications[lane].model_reg_2 ).initEnvironment ();
646644 }
647-
645+ if (clustererNNShadow.nnClusterizerVerbosity < 3 ) {
646+ LOG (info) << " Allocated ONNX stream for lane " << lane << " and device " << deviceId;
647+ }
648+ });
649+ mRec ->runParallelOuterLoop (doGPU, NSECTORS, [&](uint32_t sector) {
650+ GPUTPCNNClusterizer& clustererNN = processors ()->tpcNNClusterer [sector];
651+ GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow ()->tpcNNClusterer [sector] : clustererNN;
652+ int32_t lane = sector % numLanes;
648653 if (doGPU){
649654 clustererNNShadow.deviceId = deviceId;
650- clustererNNShadow.mISector = lane ;
655+ clustererNNShadow.mISector = sector ;
651656 clustererNNShadow.nnClusterizerTotalClusters = maxClusters;
652657 nnApplications[lane].initClusterizer (nn_settings, clustererNNShadow);
653658 } else {
654659 // TODO: not sure if this part is needed at all
655660 clustererNN.deviceId = deviceId;
656- clustererNN.mISector = lane ;
661+ clustererNN.mISector = sector ;
657662 clustererNN.nnClusterizerTotalClusters = maxClusters;
658663 nnApplications[lane].initClusterizer (nn_settings, clustererNN);
659664 }
660665 AllocateRegisteredMemory (clustererNN.mMemoryId );
661- }
666+ if (doGPU){
667+ WriteToConstantMemory (RecoStep::TPCClusterFinding, (char *)&clustererNN - (char *)processors (), &clustererNNShadow, sizeof (clustererNN), lane);
668+ TransferMemoryResourcesToGPU (RecoStep::TPCClusterFinding, &clustererNNShadow, lane);
669+ }
670+ });
662671 }
663672#endif
664673
@@ -934,12 +943,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
934943 GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow ()->tpcNNClusterer [lane] : clustererNN;
935944 GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane];
936945
946+ LOG (info) << " clustererNNShadow.inputData32: " << clustererNNShadow.inputData32 ;
947+ LOG (info) << " clustererShadow.mPclusterInRow: " << clustererShadow.mPclusterInRow ;
948+
937949 int withMC = (doGPU && propagateMCLabels);
938- if (doGPU){
939- // SetupGPUProcessor(&clustererNN, true);
940- WriteToConstantMemory (RecoStep::TPCClusterFinding, (char *)&clustererNN - (char *)processors (), &clustererNNShadow, sizeof (clustererNN), lane);
941- TransferMemoryResourcesToGPU (RecoStep::TPCClusterFinding, &clustererNNShadow, lane);
942- }
943950
944951 if (clustererNNShadow.nnClusterizerUseCfRegression || (int )(nn_settings.nnClusterizerApplyCfDeconvolution )) {
945952 runKernel<GPUTPCCFDeconvolution>({GetGrid (clusterer.mPmemory ->counters .nPositions , lane), {iSector}});
0 commit comments