Skip to content

Commit 312ae13

Browse files
ChSonnabenddavidrohr
authored andcommitted
Cleanup + handling of deconvolution
1 parent 53b81b8 commit 312ae13

File tree

1 file changed

+6
-47
lines changed

1 file changed

+6
-47
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 6 additions & 47 deletions
Original file line numberDiff line numberDiff line change
@@ -977,20 +977,10 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
977977
GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow()->tpcNNClusterer[lane] : clustererNN;
978978
GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane];
979979

980-
// // bool recreateMemoryAllocator = false;
981-
// if (lane == 0) {
982-
// (nnApplications[lane].mModelClass).initEnvironment();
983-
// nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, 0);
984-
// }
985-
// // recreateMemoryAllocator = true;
986-
// (nnApplications[lane].mModelClass).initSession();
987-
// (nnApplications[lane].mModelReg1).initSession();
988-
989980
int withMC = (doGPU && propagateMCLabels);
990981

991-
if (clustererNNShadow.mNnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
982+
if (nn_settings.nnClusterizerApplyCfDeconvolution) {
992983
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
993-
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
994984
} else if (clustererNNShadow.mNnClusterizerSetDeconvolutionFlags) {
995985
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, false);
996986
}
@@ -1007,9 +997,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
1007997
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishDeconvolutionFlags>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, batchStart); // Filling the regression data
1008998
}
1009999

1010-
// auto stop0 = std::chrono::high_resolution_clock::now();
1011-
// auto start1 = std::chrono::high_resolution_clock::now();
1012-
10131000
// NN evaluations
10141001
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10151002
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
@@ -1055,8 +1042,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10551042
}
10561043
}
10571044

1058-
// auto stopNNs = std::chrono::high_resolution_clock::now();
1059-
10601045
// Publishing kernels
10611046
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] == 1) {
10621047
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::determineClass1Labels>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Assigning class labels
@@ -1069,41 +1054,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10691054
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::publishClass2Regression>({GetGrid(iSize, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceOutputDType, withMC, batchStart); // Publishing class 2 regression results
10701055
}
10711056
}
1072-
1073-
// for(int i = 0; i < iSize; ++i) {
1074-
// if(clustererNNShadow.mOutputDataClass[i + batchStart] > 1) {
1075-
// LOG(info) << "WARNING ORT: Output of " << i + batchStart << " / " << clusterer.mPmemory->counters.nClusters << " is " << clustererNNShadow.mModelProbabilities_16[i].ToFloat() << " and " << clustererNNShadow.mOutputDataClass[i + batchStart] << " thresh " << clustererNNShadow.mNnClassThreshold << " instead of 0 or 1. Please check the model and the input data.";
1076-
// // std::string input = "[";
1077-
// // for(int j = 0; j < clustererNNShadow.mNnClusterizerElementSize; j++){
1078-
// // input += std::to_string(clustererNNShadow.mInputData_16[i * clustererNNShadow.mNnClusterizerElementSize + j].ToFloat()) + ", ";
1079-
// // }
1080-
// // input += "]";
1081-
// // LOG(info) << "Input is: " << input;
1082-
// }
1083-
// }
1084-
1085-
// auto stop1 = std::chrono::high_resolution_clock::now();
1086-
1087-
// time_networks += std::chrono::duration_cast<std::chrono::nanoseconds>(stopNNs - start1).count() / 1e9;
1088-
// time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
1089-
// time_fill += std::chrono::duration_cast<std::chrono::nanoseconds>(stop0 - start0).count() / 1e9;
10901057
}
1058+
10911059
if (clustererNNShadow.mNnClusterizerUseCfRegression) {
1092-
// auto start1 = std::chrono::high_resolution_clock::now();
1060+
if(!nn_settings.nnClusterizerApplyCfDeconvolution) {
1061+
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}}, true);
1062+
}
1063+
DoDebugAndDump(RecoStep::TPCClusterFinding, GPUChainTrackingDebugFlags::TPCClustererChargeMap, clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile, "Split Charges");
10931064
runKernel<GPUTPCNNClusterizerKernels, GPUTPCNNClusterizerKernels::runCfClusterizer>({GetGrid(clusterer.mPmemory->counters.nClusters, lane), krnlRunRangeNone}, iSector, clustererNNShadow.mNnInferenceInputDType, withMC, 0); // Running the CF regression kernel - no batching needed: batchStart = 0
1094-
// auto stop1 = std::chrono::high_resolution_clock::now();
1095-
// time_clusterizer += std::chrono::duration_cast<std::chrono::nanoseconds>(stop1 - start1).count() / 1e9;
10961065
}
1097-
// if (clustererNNShadow.mNnClusterizerVerbosity < 3) {
1098-
// int acceptedClusters = 0;
1099-
// for (size_t i = 0; i < clusterer.mPmemory->counters.nClusters; ++i) {
1100-
// if(clustererNNShadow.mOutputDataClass[i] > 1 || clustererNNShadow.mOutputDataClass[i] < 0) {
1101-
// LOG(info) << "WARNING ORT 2: " << clustererNNShadow.mOutputDataClass[i] << " for index " << i << " / " << clusterer.mPmemory->counters.nClusters;
1102-
// }
1103-
// acceptedClusters += clustererNNShadow.mOutputDataClass[i];
1104-
// }
1105-
// LOG(info) << "[NN CF] Apply NN (fragment " << fragment.index << ", lane: " << lane << ", sector: " << iSector << "): filling data " << time_fill << "s ; networks: " << time_networks << "s ; clusterizer: " << time_clusterizer << "s ; " << clusterer.mPmemory->counters.nClusters << " clusters, " << acceptedClusters << " accepted. --> " << (int32_t)clusterer.mPmemory->counters.nClusters / (time_fill + time_clusterizer) << " clusters/s";
1106-
// }
11071066
#else
11081067
GPUFatal("Project not compiled with neural network clusterization. Aborting.");
11091068
#endif

0 commit comments

Comments
 (0)