@@ -977,20 +977,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
977977 GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow ()->tpcNNClusterer [lane] : clustererNN;
978978 GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane];
979979
980- // // bool recreateMemoryAllocator = false;
981- // if (lane == 0) {
982- // (nnApplications[lane].mModelClass).initEnvironment();
983- // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, 0);
984- // }
985- // // recreateMemoryAllocator = true;
986- // (nnApplications[lane].mModelClass).initSession();
987- // (nnApplications[lane].mModelReg1).initSession();
988-
989980 int withMC = (doGPU && propagateMCLabels);
990981
991- if (clustererNNShadow. mNnClusterizerUseCfRegression || ( int )( nn_settings.nnClusterizerApplyCfDeconvolution ) ) {
982+ if (nn_settings.nnClusterizerApplyCfDeconvolution ) {
992983 runKernel<GPUTPCCFDeconvolution>({GetGrid (clusterer.mPmemory ->counters .nPositions , lane), {iSector}}, true );
993- DoDebugAndDump (RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile , " Split Charges" );
994984 } else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags ) {
995985 runKernel<GPUTPCCFDeconvolution>({GetGrid (clusterer.mPmemory ->counters .nPositions , lane), {iSector}}, false );
996986 }
@@ -1007,9 +997,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
1007997 runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>({GetGrid (iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType , withMC, batchStart); // Filling the regression data
1008998 }
1009999
1010- // auto stop0 = std::chrono::high_resolution_clock::now();
1011- // auto start1 = std::chrono::high_resolution_clock::now();
1012-
10131000 // NN evaluations
10141001 if (clustererNNShadow.mNnInferenceInputDType == 0 ) {
10151002 if (clustererNNShadow.mNnInferenceOutputDType == 0 ) {
@@ -1055,8 +1042,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10551042 }
10561043 }
10571044
1058- // auto stopNNs = std::chrono::high_resolution_clock::now();
1059-
10601045 // Publishing kernels
10611046 if (nnApplication.mModelClass .getNumOutputNodes ()[0 ][1 ] == 1 ) {
10621047 runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid (iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType , withMC, batchStart); // Assigning class labels
@@ -1069,41 +1054,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10691054 runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid (iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType , withMC, batchStart); // Publishing class 2 regression results
10701055 }
10711056 }
1072-
1073- // for(int i = 0; i < iSize; ++i) {
1074- // if(clustererNNShadow.mOutputDataClass[i + batchStart] > 1) {
1075- // LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.mModelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.mOutputDataClass[i + batchStart] << " thresh " << clustererNNShadow.mNnClassThreshold << " instead of 0 or 1. Please check the model and the input data.";
1076- // // std::string input = "[";
1077- // // for(int j = 0; j < clustererNNShadow.mNnClusterizerElementSize; j++){
1078- // // input += std::to_string(clustererNNShadow.mInputData_16[i * clustererNNShadow.mNnClusterizerElementSize + j].ToFloat()) + ", ";
1079- // // }
1080- // // input += "]";
1081- // // LOG(info) << "Input is: " << input;
1082- // }
1083- // }
1084-
1085- // auto stop1 = std::chrono::high_resolution_clock::now();
1086-
1087- // time_networks += std::chrono::duration_cast<std::chrono::nanoseconds>(stopNNs - start1).count() / 1e9;
1088- // time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
1089- // time_fill += std::chrono::duration_cast<std::chrono::nanoseconds>(stop0 - start0).count() / 1e9;
10901057 }
1058+
10911059 if (clustererNNShadow.mNnClusterizerUseCfRegression ) {
1092- // auto start1 = std::chrono::high_resolution_clock::now();
1060+ if (!nn_settings.nnClusterizerApplyCfDeconvolution ) {
1061+ runKernel<GPUTPCCFDeconvolution>({GetGrid (clusterer.mPmemory ->counters .nPositions , lane), {iSector}}, true );
1062+ }
1063+ DoDebugAndDump (RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile , " Split Charges" );
10931064 runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid (clusterer.mPmemory ->counters .nClusters , lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType , withMC, 0 ); // Running the CF regression kernel - no batching needed: batchStart = 0
1094- // auto stop1 = std::chrono::high_resolution_clock::now();
1095- // time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
10961065 }
1097- // if (clustererNNShadow.mNnClusterizerVerbosity < 3) {
1098- // int acceptedClusters = 0;
1099- // for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) {
1100- // if(clustererNNShadow.mOutputDataClass[i] > 1 || clustererNNShadow.mOutputDataClass[i] < 0) {
1101- // LOG(info) << "WARNING ORT 2: " << clustererNNShadow.mOutputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters;
1102- // }
1103- // acceptedClusters += clustererNNShadow.mOutputDataClass[i];
1104- // }
1105- // LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; networks: " << time_networks << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << (int32_t)clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s";
1106- // }
11071066#else
11081067 GPUFatal (" Project not compiled with neural network clusterization. Aborting." );
11091068#endif
0 commit comments