@@ -926,7 +926,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
926926 }
927927
928928 if ((clusterer.nnInternals )->nnClusterizerUseCfRegression || (int )(nn_settings.nnClusterizerApplyCfDeconvolution )) {
929- runKernel<GPUTPCCFDeconvolution>({GetGrid (clusterer.mPmemory ->counters .nPositions , lane), {iSlice }});
929+ runKernel<GPUTPCCFDeconvolution>({GetGrid (clusterer.mPmemory ->counters .nPositions , lane), {iSector }});
930930 DoDebugAndDump (RecoStep::TPCClusterFinding, 262144 << 4 , clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile , " Split Charges" );
931931 }
932932
@@ -958,23 +958,23 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
958958 }
959959
960960 auto start0 = std::chrono::high_resolution_clock::now ();
961- runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::fillInputNN>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice }}, evalDtype, 0 , batchStart); // Filling the data
961+ runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::fillInputNN>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector }}, evalDtype, 0 , batchStart); // Filling the data
962962 auto stop0 = std::chrono::high_resolution_clock::now ();
963963
964964 auto start1 = std::chrono::high_resolution_clock::now ();
965965 GPUTPCNNClusterizer::inferenceNetworkClass (clusterer, evalDtype);
966966 if ((clusterer.nnInternals )->model_class .getNumOutputNodes ()[0 ][1 ] == 1 ) {
967- runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass1Labels>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice }}, evalDtype, 0 , batchStart); // Assigning class labels
967+ runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass1Labels>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector }}, evalDtype, 0 , batchStart); // Assigning class labels
968968 } else {
969- runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass2Labels>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice }}, evalDtype, 0 , batchStart); // Assigning class labels
969+ runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::determineClass2Labels>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector }}, evalDtype, 0 , batchStart); // Assigning class labels
970970 }
971971
972972 if (!(clusterer.nnInternals )->nnClusterizerUseCfRegression ) {
973973 GPUTPCNNClusterizer::inferenceNetworkReg1 (clusterer, evalDtype);
974- runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::publishClass1Regression>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice }}, evalDtype, 0 , batchStart); // Running the NN for regression class 1
974+ runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::publishClass1Regression>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector }}, evalDtype, 0 , batchStart); // Running the NN for regression class 1
975975 if ((clusterer.nnInternals )->model_class .getNumOutputNodes ()[0 ][1 ] > 1 && reg_model_paths.size () > 1 ) {
976976 GPUTPCNNClusterizer::inferenceNetworkReg2 (clusterer, evalDtype);
977- runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::publishClass2Regression>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice }}, evalDtype, 0 , batchStart); // Running the NN for regression class 2
977+ runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::publishClass2Regression>({GetGrid (iSize, lane, GPUReconstruction::krnlDeviceType::CPU), {iSector }}, evalDtype, 0 , batchStart); // Running the NN for regression class 2
978978 }
979979 }
980980 auto stop1 = std::chrono::high_resolution_clock::now ();
@@ -985,18 +985,17 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
985985
986986 auto start1 = std::chrono::high_resolution_clock::now ();
987987 if ((clusterer.nnInternals )->nnClusterizerUseCfRegression ) {
988- runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::runCfClusterizer>({GetGrid (clusterer.mPmemory ->counters .nClusters , lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice }}, evalDtype, 0 , 0 ); // Running the CF regression kernel - no batching needed: batchStart = 0
988+ runKernel<GPUTPCNNClusterizer, GPUTPCNNClusterizer::runCfClusterizer>({GetGrid (clusterer.mPmemory ->counters .nClusters , lane, GPUReconstruction::krnlDeviceType::CPU), {iSector }}, evalDtype, 0 , 0 ); // Running the CF regression kernel - no batching needed: batchStart = 0
989989 }
990990 auto stop1 = std::chrono::high_resolution_clock::now ();
991991 time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count () / 1e9 ;
992992
993993 if ((clusterer.nnInternals )->nnClusterizerVerbosity < 3 ) {
994- LOG (info) << " [NN CF] Apply NN (fragment " << fragment.index << " , lane: " << lane << " , slice: " << iSlice << " ): filling data " << time_fill << " s ; clusterizer: " << time_clusterizer << " s ; " << clusterer.mPmemory ->counters .nClusters << " clusters --> " << clusterer.mPmemory ->counters .nClusters / (time_fill + time_clusterizer) << " clusters/s" ;
994+ LOG (info) << " [NN CF] Apply NN (fragment " << fragment.index << " , lane: " << lane << " , slice: " << iSector << " ): filling data " << time_fill << " s ; clusterizer: " << time_clusterizer << " s ; " << clusterer.mPmemory ->counters .nClusters << " clusters --> " << clusterer.mPmemory ->counters .nClusters / (time_fill + time_clusterizer) << " clusters/s" ;
995995 }
996996 } else {
997997#endif
998- DoDebugAndDump (RecoStep::TPCClusterFinding, 262144 << 4 , clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile , " Split Charges" );
999- runKernel<GPUTPCCFClusterizer>({GetGrid (clusterer.mPmemory ->counters .nClusters , lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice}}, 0 );
998+ runKernel<GPUTPCCFClusterizer>({GetGrid (clusterer.mPmemory ->counters .nClusters , lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, 0 );
1000999
10011000#ifdef GPUCA_HAS_ONNX
10021001 }
@@ -1007,7 +1006,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10071006 if (doGPU) {
10081007 SynchronizeStream (lane);
10091008 }
1010- runKernel<GPUTPCCFClusterizer>({GetGrid (clusterer.mPmemory ->counters .nClusters , lane, GPUReconstruction::krnlDeviceType::CPU), {iSlice }}, 1 ); // Computes MC labels
1009+ runKernel<GPUTPCCFClusterizer>({GetGrid (clusterer.mPmemory ->counters .nClusters , lane, GPUReconstruction::krnlDeviceType::CPU), {iSector }}, 1 ); // Computes MC labels
10111010 }
10121011
10131012 if (GetProcessingSettings ().debugLevel >= 3 ) {
0 commit comments