@@ -627,6 +627,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
627627 uint32_t maxClusters = 0 ;
628628 int32_t deviceId = -1 ;
629629 int32_t numLanes = GetProcessingSettings ().nTPCClustererLanes ;
630+ int32_t maxThreads = mRec ->MemoryScalers ()->nTPCdigits / 6000 ;
630631 for (uint32_t lane = 0 ; lane < NSECTORS; lane++) {
631632 maxClusters = std::max (maxClusters, processors ()->tpcClusterer [lane].mNMaxClusters );
632633 }
@@ -635,16 +636,25 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
635636 if (nnApplications[lane].modelsUsed [0 ]) {
636637 SetONNXGPUStream ((nnApplications[lane].model_class ).getSessionOptions (), lane, &deviceId);
637638 (nnApplications[lane].model_class ).setDeviceId (deviceId);
639+ if (nnApplications[lane].model_class .getIntraOpNumThreads () > maxThreads) {
640+ nnApplications[lane].model_class .setIntraOpNumThreads (maxThreads);
641+ }
638642 (nnApplications[lane].model_class ).initEnvironment ();
639643 }
640644 if (nnApplications[lane].modelsUsed [1 ]) {
641645 SetONNXGPUStream ((nnApplications[lane].model_reg_1 ).getSessionOptions (), lane, &deviceId);
642646 (nnApplications[lane].model_reg_1 ).setDeviceId (deviceId);
647+ if (nnApplications[lane].model_reg_1 .getIntraOpNumThreads () > maxThreads) {
648+ nnApplications[lane].model_reg_1 .setIntraOpNumThreads (maxThreads);
649+ }
643650 (nnApplications[lane].model_reg_1 ).initEnvironment ();
644651 }
645652 if (nnApplications[lane].modelsUsed [2 ]) {
646653 SetONNXGPUStream ((nnApplications[lane].model_reg_2 ).getSessionOptions (), lane, &deviceId);
647654 (nnApplications[lane].model_reg_2 ).setDeviceId (deviceId);
655+ if (nnApplications[lane].model_reg_2 .getIntraOpNumThreads () > maxThreads) {
656+ nnApplications[lane].model_reg_2 .setIntraOpNumThreads (maxThreads);
657+ }
648658 (nnApplications[lane].model_reg_2 ).initEnvironment ();
649659 }
650660 if (nn_settings.nnClusterizerVerbosity < 3 ) {
0 commit comments