@@ -709,19 +709,21 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
709709 LOG (info) << " (ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;
710710 }
711711 });
712+ const int16_t maxFragmentLen = GetProcessingSettings ().overrideClusterizerFragmentLen ;
713+ const uint32_t maxAllowedTimebin = param ().par .continuousTracking ? std::max<int32_t >(param ().continuousMaxTimeBin , maxFragmentLen) : TPC_MAX_TIME_BIN_TRIGGERED;
712714 for (int32_t sector = 0 ; sector < NSECTORS; sector++) {
713715 GPUTPCNNClusterizer& clustererNN = processors ()->tpcNNClusterer [sector];
714716 GPUTPCNNClusterizer& clustererNNShadow = doGPU ? processorsShadow ()->tpcNNClusterer [sector] : clustererNN;
715717 int32_t lane = sector % numLanes;
716718 clustererNN.mDeviceId = deviceId;
717719 clustererNN.mISector = sector;
718720 clustererNN.mNnClusterizerTotalClusters = processors ()->tpcClusterer [lane].mNMaxClusters ;
719- nnApplications[lane].initClusterizer (nn_settings, clustererNN);
721+ nnApplications[lane].initClusterizer (nn_settings, clustererNN, maxFragmentLen, maxAllowedTimebin );
720722 if (doGPU) {
721723 clustererNNShadow.mDeviceId = deviceId;
722724 clustererNNShadow.mISector = sector;
723725 clustererNNShadow.mNnClusterizerTotalClusters = processors ()->tpcClusterer [lane].mNMaxClusters ;
724- nnApplications[lane].initClusterizer (nn_settings, clustererNNShadow);
726+ nnApplications[lane].initClusterizer (nn_settings, clustererNNShadow, maxFragmentLen, maxAllowedTimebin );
725727 }
726728 if (nn_settings.nnClusterizerVerbosity > 2 ) {
727729 LOG (info) << " (NNCLUS, GPUChainTrackingClusterizer, this=" << this << " ) Processor initialized. Sector " << sector << " , lane " << lane << " , max clusters " << clustererNN.mNnClusterizerTotalClusters << " (clustererNN=" << &clustererNN << " , clustererNNShadow=" << &clustererNNShadow << " )" ;
@@ -1051,7 +1053,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10511053
10521054 // NN evaluations
10531055 if (clustererNNShadow.mNnClusterizerUseClassification ) {
1054- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane]->Start (); }
1056+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane]->Start (); }
10551057 if (clustererNNShadow.mNnInferenceInputDType == 0 ) {
10561058 if (clustererNNShadow.mNnInferenceOutputDType == 0 ) {
10571059 (nnApplication.mModelClass ).inference (clustererNNShadow.mInputData_16 , iSize, clustererNNShadow.mModelProbabilities_16 );
@@ -1065,13 +1067,13 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10651067 (nnApplication.mModelClass ).inference (clustererNNShadow.mInputData_32 , iSize, clustererNNShadow.mModelProbabilities_32 );
10661068 }
10671069 }
1068- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane]->Stop (); }
1070+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 )) { nnTimers[3 *lane]->Stop (); } // doGPU || lane<4 -> only for GPU or first 4 CPU lanes (to limit number of concurrent timers). At least gives some statistics for CPU time...
10691071 if (nn_settings.nnClusterizerVerbosity > 3 ) {
10701072 LOG (info) << " (NNCLUS, GPUChainTrackingClusterizer, this=" << this << " ) Done with NN classification inference. Loop=" << batch << " . (clustererNN=" << &clustererNN << " , clustererNNShadow=" << &clustererNNShadow << " )" ;
10711073 }
10721074 }
10731075 if (!clustererNNShadow.mNnClusterizerUseCfRegression ) {
1074- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 1 ]->Start (); }
1076+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 1 ]->Start (); }
10751077 if (clustererNNShadow.mNnInferenceInputDType == 0 ) {
10761078 if (clustererNNShadow.mNnInferenceOutputDType == 0 ) {
10771079 (nnApplication.mModelReg1 ).inference (clustererNNShadow.mInputData_16 , iSize, clustererNNShadow.mOutputDataReg1_16 );
@@ -1085,9 +1087,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
10851087 (nnApplication.mModelReg1 ).inference (clustererNNShadow.mInputData_32 , iSize, clustererNNShadow.mOutputDataReg1_32 );
10861088 }
10871089 }
1088- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 1 ]->Stop (); }
1090+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 1 ]->Stop (); }
10891091 if (nnApplication.mModelClass .getNumOutputNodes ()[0 ][1 ] > 1 && nnApplication.mModelReg2 .isInitialized ()) {
1090- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 2 ]->Start (); }
1092+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 2 ]->Start (); }
10911093 if (clustererNNShadow.mNnInferenceInputDType == 0 ) {
10921094 if (clustererNNShadow.mNnInferenceOutputDType == 0 ) {
10931095 (nnApplication.mModelReg2 ).inference (clustererNNShadow.mInputData_16 , iSize, clustererNNShadow.mOutputDataReg2_16 );
@@ -1101,7 +1103,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
11011103 (nnApplication.mModelReg2 ).inference (clustererNNShadow.mInputData_32 , iSize, clustererNNShadow.mOutputDataReg2_32 );
11021104 }
11031105 }
1104- if (GetProcessingSettings ().debugLevel >= 1 && doGPU) { nnTimers[3 *lane + 2 ]->Stop (); }
1106+ if (GetProcessingSettings ().debugLevel >= 1 && ( doGPU || lane < 4 ) ) { nnTimers[3 *lane + 2 ]->Stop (); }
11051107 }
11061108 if (nn_settings.nnClusterizerVerbosity > 3 ) {
11071109 LOG (info) << " (NNCLUS, GPUChainTrackingClusterizer, this=" << this << " ) Done with NN regression inference. Loop=" << batch << " . (clustererNN=" << &clustererNN << " , clustererNNShadow=" << &clustererNNShadow << " )" ;
0 commit comments