@@ -1052,7 +1052,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10521052
10531053 // NN evaluations
10541054 if (clustererNNShadow.mNnClusterizerUseClassification ) {
1055- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane]->Start (); }
1055+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane]->Start (); }
10561056 if (clustererNNShadow.mNnInferenceInputDType == 0 ) {
10571057 if (clustererNNShadow.mNnInferenceOutputDType == 0 ) {
10581058 (nnApplication.mModelClass ).inference (clustererNNShadow.mInputData_16 , iSize, clustererNNShadow.mModelProbabilities_16 );
@@ -1066,13 +1066,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10661066 (nnApplication.mModelClass ).inference (clustererNNShadow.mInputData_32 , iSize, clustererNNShadow.mModelProbabilities_32 );
10671067 }
10681068 }
1069- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane]->Stop (); }
1069+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 )) { nnTimers[3 *lane]->Stop (); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
10701070 if (nn_settings.nnClusterizerVerbosity > 3 ) {
10711071 LOG (info) << " (NNCLUS, GPUChainTrackingClusterizer, this=" << this << " ) Done with NN classification inference. Loop=" << batch << " . (clustererNN=" << &clustererNN << " , clustererNNShadow=" << &clustererNNShadow << " )" ;
10721072 }
10731073 }
10741074 if (!clustererNNShadow.mNnClusterizerUseCfRegression ) {
1075- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 1 ]->Start (); }
1075+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 1 ]->Start (); }
10761076 if (clustererNNShadow.mNnInferenceInputDType == 0 ) {
10771077 if (clustererNNShadow.mNnInferenceOutputDType == 0 ) {
10781078 (nnApplication.mModelReg1 ).inference (clustererNNShadow.mInputData_16 , iSize, clustererNNShadow.mOutputDataReg1_16 );
@@ -1086,9 +1086,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10861086 (nnApplication.mModelReg1 ).inference (clustererNNShadow.mInputData_32 , iSize, clustererNNShadow.mOutputDataReg1_32 );
10871087 }
10881088 }
1089- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 1 ]->Stop (); }
1089+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 1 ]->Stop (); }
10901090 if (nnApplication.mModelClass .getNumOutputNodes ()[0 ][1 ] > 1 && nnApplication.mModelReg2 .isInitialized ()) {
1091- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 2 ]->Start (); }
1091+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 2 ]->Start (); }
10921092 if (clustererNNShadow.mNnInferenceInputDType == 0 ) {
10931093 if (clustererNNShadow.mNnInferenceOutputDType == 0 ) {
10941094 (nnApplication.mModelReg2 ).inference (clustererNNShadow.mInputData_16 , iSize, clustererNNShadow.mOutputDataReg2_16 );
@@ -1102,7 +1102,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11021102 (nnApplication.mModelReg2 ).inference (clustererNNShadow.mInputData_32 , iSize, clustererNNShadow.mOutputDataReg2_32 );
11031103 }
11041104 }
1105- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 2 ]->Stop (); }
1105+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 2 ]->Stop (); }
11061106 }
11071107 if (nn_settings.nnClusterizerVerbosity > 3 ) {
11081108 LOG (info) << " (NNCLUS, GPUChainTrackingClusterizer, this=" << this << " ) Done with NN regression inference. Loop=" << batch << " . (clustererNN=" << &clustererNN << " , clustererNNShadow=" << &clustererNNShadow << " )" ;
0 commit comments