@@ -870,132 +870,132 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
870870 GPUInfo (" Processing time bins [%d, %d) for sectors %d to %d" , fragment.start , fragment.last (), iSectorBase, iSectorBase + GetProcessingSettings ().nTPCClustererLanes - 1 );
871871 }
872872 mRec ->runParallelOuterLoop (doGPU, maxLane, [&](uint32_t lane)
873- {
874- if (doGPU && fragment.index != 0 )
875- {
876- SynchronizeStream (lane); // Don't overwrite charge map from previous iteration until cluster computation is finished
877- }
878-
879- uint32_t iSector = iSectorBase + lane;
880- GPUTPCClusterFinder &clusterer = processors ()->tpcClusterer [iSector];
881- GPUTPCClusterFinder &clustererShadow = doGPU ? processorsShadow ()->tpcClusterer [iSector] : clusterer;
882- clusterer.mPmemory ->counters .nPeaks = clusterer.mPmemory ->counters .nClusters = 0 ;
883- clusterer.mPmemory ->fragment = fragment;
884-
885- if (mIOPtrs .tpcPackedDigits )
886- {
887- bool setDigitsOnGPU = doGPU && not mIOPtrs .tpcZS ;
888- bool setDigitsOnHost = (not doGPU && not mIOPtrs .tpcZS ) || propagateMCLabels;
889- auto *inDigits = mIOPtrs .tpcPackedDigits ;
890- size_t numDigits = inDigits->nTPCDigits [iSector];
891- if (setDigitsOnGPU)
892- {
893- GPUMemCpy (RecoStep::TPCClusterFinding, clustererShadow.mPdigits , inDigits->tpcDigits [iSector], sizeof (clustererShadow.mPdigits [0 ]) * numDigits, lane, true );
894- }
895- if (setDigitsOnHost)
896- {
897- clusterer.mPdigits = const_cast <o2::tpc::Digit *>(inDigits->tpcDigits [iSector]); // TODO: Needs fixing (invalid const cast)
898- }
899- clusterer.mPmemory ->counters .nDigits = numDigits;
900- }
901-
902- if (mIOPtrs .tpcZS )
903- {
904- if (mCFContext ->nPagesSector [iSector] && mCFContext ->zsVersion != -1 )
905- {
906- clusterer.mPmemory ->counters .nPositions = mCFContext ->nextPos [iSector].first ;
907- clusterer.mPmemory ->counters .nPagesSubsector = mCFContext ->nextPos [iSector].second ;
908- }
909- else
910- {
911- clusterer.mPmemory ->counters .nPositions = clusterer.mPmemory ->counters .nPagesSubsector = 0 ;
912- }
913- }
914- TransferMemoryResourceLinkToGPU (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
915-
916- using ChargeMapType = decltype (*clustererShadow.mPchargeMap );
917- using PeakMapType = decltype (*clustererShadow.mPpeakMap );
918- runKernel<GPUMemClean16>({GetGridAutoStep (lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap , TPCMapMemoryLayout<ChargeMapType>::items (GetProcessingSettings ().overrideClusterizerFragmentLen ) * sizeof (ChargeMapType)); // TODO: Not working in OpenCL2!!!
919- runKernel<GPUMemClean16>({GetGridAutoStep (lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap , TPCMapMemoryLayout<PeakMapType>::items (GetProcessingSettings ().overrideClusterizerFragmentLen ) * sizeof (PeakMapType));
920- if (fragment.index == 0 )
921- {
922- runKernel<GPUMemClean16>({GetGridAutoStep (lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy , TPC_PADS_IN_SECTOR * sizeof (*clustererShadow.mPpadIsNoisy ));
923- }
924- DoDebugAndDump (RecoStep::TPCClusterFinding, 262144 , clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile , " Zeroed Charges" );
925-
926- if (doGPU)
927- {
928- if (mIOPtrs .tpcZS && mCFContext ->nPagesSector [iSector] && mCFContext ->zsVersion != -1 )
929- {
930- TransferMemoryResourceLinkToGPU (RecoStep::TPCClusterFinding, mInputsHost ->mResourceZS , lane);
931- SynchronizeStream (GetProcessingSettings ().nTPCClustererLanes + lane);
932- }
933- SynchronizeStream (mRec ->NStreams () - 1); // Wait for copying to constant memory
934- }
935-
936- if (mIOPtrs .tpcZS && (mCFContext ->abandonTimeframe || !mCFContext ->nPagesSector [iSector] || mCFContext ->zsVersion == -1 ))
937- {
938- clusterer.mPmemory ->counters .nPositions = 0 ;
939- return ;
940- }
941- if (!mIOPtrs .tpcZS && mIOPtrs .tpcPackedDigits ->nTPCDigits [iSector] == 0 )
942- {
943- clusterer.mPmemory ->counters .nPositions = 0 ;
944- return ;
945- }
946-
947- if (propagateMCLabels && fragment.index == 0 )
948- {
949- clusterer.PrepareMC ();
950- clusterer.mPinputLabels = digitsMC->v [iSector];
951- if (clusterer.mPinputLabels == nullptr )
952- {
953- GPUFatal (" MC label container missing, sector %d" , iSector);
954- }
955- if (clusterer.mPinputLabels ->getIndexedSize () != mIOPtrs .tpcPackedDigits ->nTPCDigits [iSector])
956- {
957- GPUFatal (" MC label container has incorrect number of entries: %d expected, has %d\n " , (int32_t )mIOPtrs .tpcPackedDigits ->nTPCDigits [iSector], (int32_t )clusterer.mPinputLabels ->getIndexedSize ());
958- }
959- }
960-
961- if (GetProcessingSettings ().tpcSingleSector == -1 || GetProcessingSettings ().tpcSingleSector == (int32_t )iSector)
962- {
963- if (not mIOPtrs .tpcZS )
964- {
965- runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::findFragmentStart>({GetGrid (1 , lane), {iSector}}, mIOPtrs .tpcZS == nullptr );
966- TransferMemoryResourceLinkToHost (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
967- }
968- else if (propagateMCLabels)
969- {
970- runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::findFragmentStart>({GetGrid (1 , lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, mIOPtrs .tpcZS == nullptr );
971- TransferMemoryResourceLinkToGPU (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
972- }
973- }
974-
975- if (mIOPtrs .tpcZS )
976- {
977- int32_t firstHBF = (mIOPtrs .settingsTF && mIOPtrs .settingsTF ->hasTfStartOrbit ) ? mIOPtrs .settingsTF ->tfStartOrbit : ((mIOPtrs .tpcZS ->sector [iSector].count [0 ] && mIOPtrs .tpcZS ->sector [iSector].nZSPtr [0 ][0 ]) ? o2::raw::RDHUtils::getHeartBeatOrbit (*(const o2::header::RAWDataHeader *)mIOPtrs .tpcZS ->sector [iSector].zsPtr [0 ][0 ]) : 0 );
978- uint32_t nBlocks = doGPU ? clusterer.mPmemory ->counters .nPagesSubsector : GPUTrackingInOutZS::NENDPOINTS;
979-
980- (void )tpcTimeBinCut; // TODO: To be used in decoding kernels
981- switch (mCFContext ->zsVersion )
982- {
983- default :
984- GPUFatal (" Data with invalid TPC ZS mode (%d) received" , mCFContext ->zsVersion );
985- break ;
986- case ZSVersionRowBased10BitADC:
987- case ZSVersionRowBased12BitADC:
988- runKernel<GPUTPCCFDecodeZS>({GetGridBlk (nBlocks, lane), {iSector}}, firstHBF);
989- break ;
990- case ZSVersionLinkBasedWithMeta:
991- runKernel<GPUTPCCFDecodeZSLink>({GetGridBlk (nBlocks, lane), {iSector}}, firstHBF);
992- break ;
993- case ZSVersionDenseLinkBased:
994- runKernel<GPUTPCCFDecodeZSDenseLink>({GetGridBlk (nBlocks, lane), {iSector}}, firstHBF);
995- break ;
996- }
997- TransferMemoryResourceLinkToHost (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
998- } // clang-format off
873+ {
874+ if (doGPU && fragment.index != 0 )
875+ {
876+ SynchronizeStream (lane); // Don't overwrite charge map from previous iteration until cluster computation is finished
877+ }
878+
879+ uint32_t iSector = iSectorBase + lane;
880+ GPUTPCClusterFinder &clusterer = processors ()->tpcClusterer [iSector];
881+ GPUTPCClusterFinder &clustererShadow = doGPU ? processorsShadow ()->tpcClusterer [iSector] : clusterer;
882+ clusterer.mPmemory ->counters .nPeaks = clusterer.mPmemory ->counters .nClusters = 0 ;
883+ clusterer.mPmemory ->fragment = fragment;
884+
885+ if (mIOPtrs .tpcPackedDigits )
886+ {
887+ bool setDigitsOnGPU = doGPU && not mIOPtrs .tpcZS ;
888+ bool setDigitsOnHost = (not doGPU && not mIOPtrs .tpcZS ) || propagateMCLabels;
889+ auto *inDigits = mIOPtrs .tpcPackedDigits ;
890+ size_t numDigits = inDigits->nTPCDigits [iSector];
891+ if (setDigitsOnGPU)
892+ {
893+ GPUMemCpy (RecoStep::TPCClusterFinding, clustererShadow.mPdigits , inDigits->tpcDigits [iSector], sizeof (clustererShadow.mPdigits [0 ]) * numDigits, lane, true );
894+ }
895+ if (setDigitsOnHost)
896+ {
897+ clusterer.mPdigits = const_cast <o2::tpc::Digit *>(inDigits->tpcDigits [iSector]); // TODO: Needs fixing (invalid const cast)
898+ }
899+ clusterer.mPmemory ->counters .nDigits = numDigits;
900+ }
901+
902+ if (mIOPtrs .tpcZS )
903+ {
904+ if (mCFContext ->nPagesSector [iSector] && mCFContext ->zsVersion != -1 )
905+ {
906+ clusterer.mPmemory ->counters .nPositions = mCFContext ->nextPos [iSector].first ;
907+ clusterer.mPmemory ->counters .nPagesSubsector = mCFContext ->nextPos [iSector].second ;
908+ }
909+ else
910+ {
911+ clusterer.mPmemory ->counters .nPositions = clusterer.mPmemory ->counters .nPagesSubsector = 0 ;
912+ }
913+ }
914+ TransferMemoryResourceLinkToGPU (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
915+
916+ using ChargeMapType = decltype (*clustererShadow.mPchargeMap );
917+ using PeakMapType = decltype (*clustererShadow.mPpeakMap );
918+ runKernel<GPUMemClean16>({GetGridAutoStep (lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPchargeMap , TPCMapMemoryLayout<ChargeMapType>::items (GetProcessingSettings ().overrideClusterizerFragmentLen ) * sizeof (ChargeMapType)); // TODO: Not working in OpenCL2!!!
919+ runKernel<GPUMemClean16>({GetGridAutoStep (lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpeakMap , TPCMapMemoryLayout<PeakMapType>::items (GetProcessingSettings ().overrideClusterizerFragmentLen ) * sizeof (PeakMapType));
920+ if (fragment.index == 0 )
921+ {
922+ runKernel<GPUMemClean16>({GetGridAutoStep (lane, RecoStep::TPCClusterFinding)}, clustererShadow.mPpadIsNoisy , TPC_PADS_IN_SECTOR * sizeof (*clustererShadow.mPpadIsNoisy ));
923+ }
924+ DoDebugAndDump (RecoStep::TPCClusterFinding, 262144 , clusterer, &GPUTPCClusterFinder::DumpChargeMap, *mDebugFile , " Zeroed Charges" );
925+
926+ if (doGPU)
927+ {
928+ if (mIOPtrs .tpcZS && mCFContext ->nPagesSector [iSector] && mCFContext ->zsVersion != -1 )
929+ {
930+ TransferMemoryResourceLinkToGPU (RecoStep::TPCClusterFinding, mInputsHost ->mResourceZS , lane);
931+ SynchronizeStream (GetProcessingSettings ().nTPCClustererLanes + lane);
932+ }
933+ SynchronizeStream (mRec ->NStreams () - 1); // Wait for copying to constant memory
934+ }
935+
936+ if (mIOPtrs .tpcZS && (mCFContext ->abandonTimeframe || !mCFContext ->nPagesSector [iSector] || mCFContext ->zsVersion == -1 ))
937+ {
938+ clusterer.mPmemory ->counters .nPositions = 0 ;
939+ return ;
940+ }
941+ if (!mIOPtrs .tpcZS && mIOPtrs .tpcPackedDigits ->nTPCDigits [iSector] == 0 )
942+ {
943+ clusterer.mPmemory ->counters .nPositions = 0 ;
944+ return ;
945+ }
946+
947+ if (propagateMCLabels && fragment.index == 0 )
948+ {
949+ clusterer.PrepareMC ();
950+ clusterer.mPinputLabels = digitsMC->v [iSector];
951+ if (clusterer.mPinputLabels == nullptr )
952+ {
953+ GPUFatal (" MC label container missing, sector %d" , iSector);
954+ }
955+ if (clusterer.mPinputLabels ->getIndexedSize () != mIOPtrs .tpcPackedDigits ->nTPCDigits [iSector])
956+ {
957+ GPUFatal (" MC label container has incorrect number of entries: %d expected, has %d\n " , (int32_t )mIOPtrs .tpcPackedDigits ->nTPCDigits [iSector], (int32_t )clusterer.mPinputLabels ->getIndexedSize ());
958+ }
959+ }
960+
961+ if (GetProcessingSettings ().tpcSingleSector == -1 || GetProcessingSettings ().tpcSingleSector == (int32_t )iSector)
962+ {
963+ if (not mIOPtrs .tpcZS )
964+ {
965+ runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::findFragmentStart>({GetGrid (1 , lane), {iSector}}, mIOPtrs .tpcZS == nullptr );
966+ TransferMemoryResourceLinkToHost (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
967+ }
968+ else if (propagateMCLabels)
969+ {
970+ runKernel<GPUTPCCFChargeMapFiller, GPUTPCCFChargeMapFiller::findFragmentStart>({GetGrid (1 , lane, GPUReconstruction::krnlDeviceType::CPU), {iSector}}, mIOPtrs .tpcZS == nullptr );
971+ TransferMemoryResourceLinkToGPU (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
972+ }
973+ }
974+
975+ if (mIOPtrs .tpcZS )
976+ {
977+ int32_t firstHBF = (mIOPtrs .settingsTF && mIOPtrs .settingsTF ->hasTfStartOrbit ) ? mIOPtrs .settingsTF ->tfStartOrbit : ((mIOPtrs .tpcZS ->sector [iSector].count [0 ] && mIOPtrs .tpcZS ->sector [iSector].nZSPtr [0 ][0 ]) ? o2::raw::RDHUtils::getHeartBeatOrbit (*(const o2::header::RAWDataHeader *)mIOPtrs .tpcZS ->sector [iSector].zsPtr [0 ][0 ]) : 0 );
978+ uint32_t nBlocks = doGPU ? clusterer.mPmemory ->counters .nPagesSubsector : GPUTrackingInOutZS::NENDPOINTS;
979+
980+ (void )tpcTimeBinCut; // TODO: To be used in decoding kernels
981+ switch (mCFContext ->zsVersion )
982+ {
983+ default :
984+ GPUFatal (" Data with invalid TPC ZS mode (%d) received" , mCFContext ->zsVersion );
985+ break ;
986+ case ZSVersionRowBased10BitADC:
987+ case ZSVersionRowBased12BitADC:
988+ runKernel<GPUTPCCFDecodeZS>({GetGridBlk (nBlocks, lane), {iSector}}, firstHBF);
989+ break ;
990+ case ZSVersionLinkBasedWithMeta:
991+ runKernel<GPUTPCCFDecodeZSLink>({GetGridBlk (nBlocks, lane), {iSector}}, firstHBF);
992+ break ;
993+ case ZSVersionDenseLinkBased:
994+ runKernel<GPUTPCCFDecodeZSDenseLink>({GetGridBlk (nBlocks, lane), {iSector}}, firstHBF);
995+ break ;
996+ }
997+ TransferMemoryResourceLinkToHost (RecoStep::TPCClusterFinding, clusterer.mMemoryId , lane);
998+ } // clang-format off
999999 });
10001000 mRec ->runParallelOuterLoop (doGPU, maxLane, [&](uint32_t lane) {
10011001 uint32_t iSector = iSectorBase + lane;
0 commit comments