@@ -658,7 +658,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
658658 // But environment must be valid, so we init the model environment first and use it here afterwards.
659659 // Either this is done in one environment with lane == 0 or by recreating the allocator using recreateMemoryAllocator.
660660 // TODO: Volatile allocation works for reserving, but not yet for allocations when binding the input tensor
661- // nnApplications[lane].volatileOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
661+ // if (lane == 0) {
662+ // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
663+ // }
662664 // recreateMemoryAllocator = true;
663665 (nnApplications[lane].mModelClass ).initSession ();
664666 }
@@ -670,7 +672,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
670672 }
671673 // (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv());
672674 (nnApplications[lane].mModelReg1 ).initEnvironment ();
673- // nnApplications[lane].volatileOrtAllocator ((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator);
675+ // nnApplications[lane].directOrtAllocator ((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator);
674676 (nnApplications[lane].mModelReg1 ).initSession ();
675677 }
676678 if (nnApplications[lane].mModelsUsed [2 ]) {
@@ -679,8 +681,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
679681 if (nnApplications[lane].mModelReg2 .getIntraOpNumThreads () > maxThreads) {
680682 nnApplications[lane].mModelReg2 .setIntraOpNumThreads (maxThreads);
681683 }
684+ // (nnApplications[lane].mModelReg2).setEnv((nnApplications[lane].mModelClass).getEnv());
682685 (nnApplications[lane].mModelReg2 ).initEnvironment ();
683- // nnApplications[lane].volatileOrtAllocator ((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
686+ // nnApplications[lane].directOrtAllocator ((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
684687 (nnApplications[lane].mModelReg2 ).initSession ();
685688 }
686689 if (nn_settings.nnClusterizerVerbosity < 3 ) {
@@ -706,8 +709,6 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
706709 if (doGPU) {
707710 WriteToConstantMemory (RecoStep::TPCClusterFinding, (char *)&processors ()->tpcNNClusterer - (char *)processors (), &processorsShadow ()->tpcNNClusterer , sizeof (GPUTPCNNClusterizer) * NSECTORS, mRec ->NStreams () - 1, &mEvents->init);
708711 }
709- LOG (info) << " Size of nnApplications[lane]: " << sizeof (nnApplications[0 ]) << " bytes" ;
710- LOG (info) << " Size of nnApplications: " << sizeof (GPUTPCNNClusterizerHost) * GetProcessingSettings ().nTPCClustererLanes << " bytes" ;
711712 }
712713#endif
713714
@@ -975,6 +976,15 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
975976 GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow ()->tpcNNClusterer [lane] : clustererNN;
976977 GPUTPCNNClusterizerHost& nnApplication = nnApplications[lane];
977978
979+ // // bool recreateMemoryAllocator = false;
980+ // if (lane == 0) {
981+ // (nnApplications[lane].mModelClass).initEnvironment();
982+ // nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, 0);
983+ // }
984+ // // recreateMemoryAllocator = true;
985+ // (nnApplications[lane].mModelClass).initSession();
986+ // (nnApplications[lane].mModelReg1).initSession();
987+
978988 int withMC = (doGPU && propagateMCLabels);
979989
980990 if (clustererNNShadow.mNnClusterizerUseCfRegression || (int )(nn_settings.nnClusterizerApplyCfDeconvolution )) {
@@ -1187,12 +1197,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11871197 }
11881198 }
11891199 for (int32_t i = 0 ; i < GetProcessingSettings ().nTPCClustererLanes ; i++) {
1190- // if (GetProcessingSettings().nn.applyNNclusterizer) {
1191- // GPUTPCNNClusterizerHost& nnApplication = nnApplications[i];
1192- // nnApplication.mModelClass.release(GetProcessingSettings().nn.nnInferenceOrtProfiling);
1193- // nnApplication.mModelReg1.release(GetProcessingSettings().nn.nnInferenceOrtProfiling);
1194- // nnApplication.mModelReg2.release(GetProcessingSettings().nn.nnInferenceOrtProfiling);
1195- // }
1200+ if (GetProcessingSettings ().nn .applyNNclusterizer ) {
1201+ LOG (info) << " (ORT) Environment releasing..." ;
1202+ GPUTPCNNClusterizerHost& nnApplication = nnApplications[i];
1203+ nnApplication.mModelClass .release (true );
1204+ nnApplication.mModelReg1 .release (true );
1205+ nnApplication.mModelReg2 .release (true );
1206+ }
11961207 if (transferRunning[i]) {
11971208 ReleaseEvent (mEvents ->stream [i], doGPU);
11981209 }
0 commit comments