@@ -93,6 +93,8 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
9393 }
9494
9595 bool streamInit[GPUCA_MAX_STREAMS] = {false };
96+ int32_t streamInitAndOccMap = mRec ->NStreams () - 1;
97+
9698 if (doGPU) {
9799 for (uint32_t iSector = 0 ; iSector < NSECTORS; iSector++) {
98100 processorsShadow ()->tpcTrackers [iSector].GPUParametersConst ()->gpumem = (char *)mRec ->DeviceMemoryBase ();
@@ -113,18 +115,12 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
113115 return 2 ;
114116 }
115117
116- WriteToConstantMemory (RecoStep::TPCSectorTracking, (char *)processors ()->tpcTrackers - (char *)processors (), processorsShadow ()->tpcTrackers , sizeof (GPUTPCTracker) * NSECTORS, mRec -> NStreams () - 1 , &mEvents->init);
118+ WriteToConstantMemory (RecoStep::TPCSectorTracking, (char *)processors ()->tpcTrackers - (char *)processors (), processorsShadow ()->tpcTrackers , sizeof (GPUTPCTracker) * NSECTORS, streamInitAndOccMap , &mEvents ->init );
117119
118- for (int32_t i = 0 ; i < mRec ->NStreams () - 1; i++) {
119- streamInit[i] = false ;
120- }
121- streamInit[mRec ->NStreams () - 1] = true;
122- }
123- if (GPUDebug (" Initialization (1)" , 0 )) {
124- return (2 );
120+ std::fill (streamInit, streamInit + mRec ->NStreams (), false);
121+ streamInit[streamInitAndOccMap] = true ;
125122 }
126123
127- int32_t streamOccMap = mRec ->NStreams () - 1;
128124 if (param ().rec .tpc .occupancyMapTimeBins || param ().rec .tpc .sysClusErrorC12Norm ) {
129125 AllocateRegisteredMemory (mInputsHost ->mResourceOccupancyMap , mSubOutputControls [GPUTrackingOutputs::getIndex (&GPUTrackingOutputs::tpcOccupancyMap)]);
130126 }
@@ -134,21 +130,21 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
134130 }
135131 uint32_t * ptr = doGPU ? mInputsShadow ->mTPCClusterOccupancyMap : mInputsHost ->mTPCClusterOccupancyMap ;
136132 auto * ptrTmp = (GPUTPCClusterOccupancyMapBin*)mRec ->AllocateVolatileMemory (GPUTPCClusterOccupancyMapBin::getTotalSize (param ()), doGPU);
137- runKernel<GPUMemClean16>(GetGridAutoStep (streamOccMap , RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize (param ()));
138- runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk (GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamOccMap ), ptrTmp);
139- runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk (GPUTPCClusterOccupancyMapBin::getNBins (param ()), streamOccMap ), ptrTmp, ptr + 2 );
133+ runKernel<GPUMemClean16>(GetGridAutoStep (streamInitAndOccMap , RecoStep::TPCSectorTracking), ptrTmp, GPUTPCClusterOccupancyMapBin::getTotalSize (param ()));
134+ runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fill>(GetGridBlk (GPUCA_NSECTORS * GPUCA_ROW_COUNT, streamInitAndOccMap ), ptrTmp);
135+ runKernel<GPUTPCCreateOccupancyMap, GPUTPCCreateOccupancyMap::fold>(GetGridBlk (GPUTPCClusterOccupancyMapBin::getNBins (param ()), streamInitAndOccMap ), ptrTmp, ptr + 2 );
140136 mRec ->ReturnVolatileMemory ();
141137 mInputsHost ->mTPCClusterOccupancyMap [1 ] = param ().rec .tpc .occupancyMapTimeBins * 0x10000 + param ().rec .tpc .occupancyMapTimeBinsAverage ;
142138 if (doGPU) {
143- GPUMemCpy (RecoStep::TPCSectorTracking, mInputsHost ->mTPCClusterOccupancyMap + 2 , mInputsShadow ->mTPCClusterOccupancyMap + 2 , sizeof (*ptr) * GPUTPCClusterOccupancyMapBin::getNBins (mRec ->GetParam ()), streamOccMap , false , &mEvents ->init );
139+ GPUMemCpy (RecoStep::TPCSectorTracking, mInputsHost ->mTPCClusterOccupancyMap + 2 , mInputsShadow ->mTPCClusterOccupancyMap + 2 , sizeof (*ptr) * GPUTPCClusterOccupancyMapBin::getNBins (mRec ->GetParam ()), streamInitAndOccMap , false , &mEvents ->init );
144140 } else {
145- TransferMemoryResourceLinkToGPU (RecoStep::TPCSectorTracking, mInputsHost ->mResourceOccupancyMap , streamOccMap , &mEvents ->init );
141+ TransferMemoryResourceLinkToGPU (RecoStep::TPCSectorTracking, mInputsHost ->mResourceOccupancyMap , streamInitAndOccMap , &mEvents ->init );
146142 }
147143 }
148144 if (param ().rec .tpc .occupancyMapTimeBins || param ().rec .tpc .sysClusErrorC12Norm ) {
149145 uint32_t & occupancyTotal = *mInputsHost ->mTPCClusterOccupancyMap ;
150146 occupancyTotal = CAMath::Float2UIntRn (mRec ->MemoryScalers ()->nTPCHits / (mIOPtrs .settingsTF && mIOPtrs .settingsTF ->hasNHBFPerTF ? mIOPtrs .settingsTF ->nHBFPerTF : 128 ));
151- mRec ->UpdateParamOccupancyMap (param ().rec .tpc .occupancyMapTimeBins ? mInputsHost ->mTPCClusterOccupancyMap + 2 : nullptr , param ().rec .tpc .occupancyMapTimeBins ? mInputsShadow ->mTPCClusterOccupancyMap + 2 : nullptr , occupancyTotal, streamOccMap );
147+ mRec ->UpdateParamOccupancyMap (param ().rec .tpc .occupancyMapTimeBins ? mInputsHost ->mTPCClusterOccupancyMap + 2 : nullptr , param ().rec .tpc .occupancyMapTimeBins ? mInputsShadow ->mTPCClusterOccupancyMap + 2 : nullptr , occupancyTotal, streamInitAndOccMap );
152148 }
153149
154150 int32_t streamMap[NSECTORS];
@@ -190,19 +186,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal()
190186 }
191187 }
192188
193- // Initialize temporary memory where needed
194- if (GetProcessingSettings ().debugLevel >= 3 ) {
195- GPUInfo (" Copying Sector Data to GPU and initializing temporary memory" );
196- }
197189 runKernel<GPUMemClean16>(GetGridAutoStep (useStream, RecoStep::TPCSectorTracking), trkShadow.Data ().HitWeights (), trkShadow.Data ().NumberOfHitsPlusAlign () * sizeof (*trkShadow.Data ().HitWeights ()));
198-
199- if (!doGPU) {
200- TransferMemoryResourcesToGPU (RecoStep::TPCSectorTracking, &trk, useStream); // Copy Data to GPU Global Memory
201- }
202- if (GPUDebug (" Initialization (3)" , useStream)) {
203- throw std::runtime_error (" memcpy failure" );
204- }
205-
206190 runKernel<GPUTPCNeighboursFinder>({GetGridBlk (GPUCA_ROW_COUNT, useStream), {iSector}, {nullptr , streamInit[useStream] ? nullptr : &mEvents ->init }});
207191 streamInit[useStream] = true ;
208192
0 commit comments