Skip to content

Commit b756dcc

Browse files
committed
Switch on timers on CPU for the first four lanes
1 parent 9ab3245 commit b756dcc

File tree

1 file changed

+6
-6
lines changed

1 file changed

+6
-6
lines changed

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1052,7 +1052,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10521052

10531053
// NN evaluations
10541054
if(clustererNNShadow.mNnClusterizerUseClassification) {
1055-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Start(); }
1055+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Start(); }
10561056
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10571057
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10581058
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mModelProbabilities_16);
@@ -1066,13 +1066,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10661066
(nnApplication.mModelClass).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mModelProbabilities_32);
10671067
}
10681068
}
1069-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane]->Stop(); }
1069+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane]->Stop(); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
10701070
if (nn_settings.nnClusterizerVerbosity > 3) {
10711071
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN classification inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";
10721072
}
10731073
}
10741074
if (!clustererNNShadow.mNnClusterizerUseCfRegression) {
1075-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Start(); }
1075+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Start(); }
10761076
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10771077
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10781078
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg1_16);
@@ -1086,9 +1086,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10861086
(nnApplication.mModelReg1).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg1_32);
10871087
}
10881088
}
1089-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 1]->Stop(); }
1089+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 1]->Stop(); }
10901090
if (nnApplication.mModelClass.getNumOutputNodes()[0][1] > 1 && nnApplication.mModelReg2.isInitialized()) {
1091-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Start(); }
1091+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Start(); }
10921092
if (clustererNNShadow.mNnInferenceInputDType == 0) {
10931093
if (clustererNNShadow.mNnInferenceOutputDType == 0) {
10941094
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_16, iSize, clustererNNShadow.mOutputDataReg2_16);
@@ -1102,7 +1102,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11021102
(nnApplication.mModelReg2).inference(clustererNNShadow.mInputData_32, iSize, clustererNNShadow.mOutputDataReg2_32);
11031103
}
11041104
}
1105-
if(GetProcessingSettings().debugLevel >= 1 && doGPU) { nnTimers[3*lane + 2]->Stop(); }
1105+
if(GetProcessingSettings().debugLevel >= 1 && (doGPU || lane < 4)) { nnTimers[3*lane + 2]->Stop(); }
11061106
}
11071107
if (nn_settings.nnClusterizerVerbosity > 3) {
11081108
LOG(info) << "(NNCLUS, GPUChainTrackingClusterizer, this=" << this << ") Done with NN regression inference. Loop=" << batch << ". (clustererNN=" << &clustererNN << ", clustererNNShadow=" << &clustererNNShadow << ")";

0 commit comments

Comments
 (0)