|
24 | 24 |
|
25 | 25 | using namespace o2::gpu; |
26 | 26 |
|
27 | | -int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, int32_t threadId, bool synchronizeOutput) |
| 27 | +uint32_t GPUChainTracking::StreamForSector(uint32_t sector) const |
28 | 28 | { |
29 | | - runKernel<GPUTPCExtrapolationTracking>({GetGridBlk(256, iSector % mRec->NStreams()), {iSector}}); |
30 | | - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), iSector % mRec->NStreams()); |
31 | | - if (synchronizeOutput) { |
32 | | - SynchronizeStream(iSector % mRec->NStreams()); |
| 29 | + return sector % mRec->NStreams(); |
| 30 | +} |
| 31 | + |
| 32 | +int32_t GPUChainTracking::ExtrapolationTracking(uint32_t iSector, bool blocking) |
| 33 | +{ |
| 34 | + const uint32_t stream = StreamForSector(iSector); |
| 35 | + runKernel<GPUTPCExtrapolationTracking>({GetGridBlk(256, stream), {iSector}}); |
| 36 | + TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[iSector].MemoryResCommon(), stream); |
| 37 | + if (blocking) { |
| 38 | + SynchronizeStream(stream); |
33 | 39 | } |
34 | 40 | return (0); |
35 | 41 | } |
@@ -153,7 +159,7 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() |
153 | 159 | mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { |
154 | 160 | GPUTPCTracker& trk = processors()->tpcTrackers[iSector]; |
155 | 161 | GPUTPCTracker& trkShadow = doGPU ? processorsShadow()->tpcTrackers[iSector] : trk; |
156 | | - int32_t useStream = (iSector % mRec->NStreams()); |
| 162 | + int32_t useStream = StreamForSector(iSector); |
157 | 163 |
|
158 | 164 | if (GetProcessingSettings().debugLevel >= 3) { |
159 | 165 | GPUInfo("Creating Sector Data (Sector %d)", iSector); |
@@ -234,102 +240,38 @@ int32_t GPUChainTracking::RunTPCTrackingSectors_internal() |
234 | 240 | } |
235 | 241 |
|
236 | 242 | if (doGPU || GetProcessingSettings().debugLevel >= 1) { |
237 | | - if (doGPU) { |
238 | | - ReleaseEvent(mEvents->init); |
239 | | - } |
240 | | - |
241 | | - mSectorSelectorReady = 0; |
242 | | - |
243 | | - std::array<bool, NSECTORS> transferRunning; |
244 | | - transferRunning.fill(true); |
245 | | - if (doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging)) { // TODO: This seems pretty obsolete code path, can probably be removed. |
246 | | - if (param().rec.tpc.extrapolationTracking) { |
247 | | - mExtrapolationTrackingDone.fill(0); |
248 | | - } |
249 | | - |
250 | | - uint32_t tmpSector = 0; |
251 | | - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { |
252 | | - if (GetProcessingSettings().debugLevel >= 3) { |
253 | | - GPUInfo("Transfering Tracks from GPU to Host"); |
254 | | - } |
255 | | - |
256 | | - if (tmpSector == iSector) { |
257 | | - SynchronizeEvents(&mEvents->sector[iSector]); |
258 | | - } |
259 | | - while (tmpSector < NSECTORS && (tmpSector == iSector || IsEventDone(&mEvents->sector[tmpSector]))) { |
260 | | - ReleaseEvent(mEvents->sector[tmpSector]); |
261 | | - if (*processors()->tpcTrackers[tmpSector].NTracks() > 0) { |
262 | | - TransferMemoryResourceLinkToHost(RecoStep::TPCSectorTracking, processors()->tpcTrackers[tmpSector].MemoryResOutput(), streamMap[tmpSector], &mEvents->sector[tmpSector]); |
263 | | - } else { |
264 | | - transferRunning[tmpSector] = false; |
265 | | - } |
266 | | - tmpSector++; |
267 | | - } |
268 | | - |
269 | | - if (GetProcessingSettings().keepAllMemory) { |
270 | | - TransferMemoryResourcesToHost(RecoStep::TPCSectorTracking, &processors()->tpcTrackers[iSector], -1, true); |
271 | | - } |
272 | | - |
273 | | - if (transferRunning[iSector]) { |
274 | | - SynchronizeEvents(&mEvents->sector[iSector]); |
275 | | - } |
276 | | - if (GetProcessingSettings().debugLevel >= 3) { |
277 | | - GPUInfo("Tracks Transfered: %d / %d", *processors()->tpcTrackers[iSector].NTracks(), *processors()->tpcTrackers[iSector].NTrackHits()); |
278 | | - } |
279 | | - |
280 | | - if (GetProcessingSettings().debugLevel >= 3) { |
281 | | - GPUInfo("Data ready for sector %d", iSector); |
282 | | - } |
283 | | - mSectorSelectorReady = iSector; |
284 | | - |
285 | | - if (param().rec.tpc.extrapolationTracking) { |
286 | | - for (uint32_t tmpSector2a = 0; tmpSector2a <= iSector; tmpSector2a++) { |
287 | | - uint32_t tmpSector2 = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(tmpSector2a); |
288 | | - uint32_t sectorLeft, sectorRight; |
289 | | - GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector2, sectorLeft, sectorRight); |
290 | | - |
291 | | - if (tmpSector2 <= iSector && sectorLeft <= iSector && sectorRight <= iSector && mExtrapolationTrackingDone[tmpSector2] == 0) { |
292 | | - ExtrapolationTracking(tmpSector2, 0); |
293 | | - mExtrapolationTrackingDone[tmpSector2] = 1; |
294 | | - } |
295 | | - } |
296 | | - } |
297 | | - } |
298 | | - } |
299 | 243 | if (param().rec.tpc.extrapolationTracking) { |
300 | 244 | std::vector<bool> blocking(NSECTORS * mRec->NStreams()); |
301 | | - for (int32_t i = 0; i < NSECTORS; i++) { |
302 | | - for (int32_t j = 0; j < mRec->NStreams(); j++) { |
303 | | - blocking[i * mRec->NStreams() + j] = i % mRec->NStreams() == j; |
| 245 | + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { |
| 246 | + for (uint32_t iStream = 0; iStream < mRec->NStreams(); iStream++) { |
| 247 | + blocking[iSector * mRec->NStreams() + iStream] = StreamForSector(iSector) == iStream; |
304 | 248 | } |
305 | 249 | } |
306 | 250 | for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { |
307 | 251 | uint32_t tmpSector = GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorOrder(iSector); |
308 | | - if (!(doGPU && !(GetRecoStepsGPU() & RecoStep::TPCMerging))) { |
309 | | - uint32_t sectorLeft, sectorRight; |
310 | | - GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); |
311 | | - if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()]) { |
312 | | - StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorLeft]); |
313 | | - blocking[tmpSector * mRec->NStreams() + sectorLeft % mRec->NStreams()] = true; |
314 | | - } |
315 | | - if (doGPU && !blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()]) { |
316 | | - StreamWaitForEvents(tmpSector % mRec->NStreams(), &mEvents->sector[sectorRight]); |
317 | | - blocking[tmpSector * mRec->NStreams() + sectorRight % mRec->NStreams()] = true; |
318 | | - } |
| 252 | + uint32_t sectorLeft, sectorRight; |
| 253 | + GPUTPCExtrapolationTracking::ExtrapolationTrackingSectorLeftRight(tmpSector, sectorLeft, sectorRight); |
| 254 | + if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)]) { |
| 255 | + StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorLeft]); |
| 256 | + blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorLeft)] = true; |
| 257 | + } |
| 258 | + if (doGPU && !blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)]) { |
| 259 | + StreamWaitForEvents(StreamForSector(tmpSector), &mEvents->sector[sectorRight]); |
| 260 | + blocking[tmpSector * mRec->NStreams() + StreamForSector(sectorRight)] = true; |
319 | 261 | } |
320 | | - ExtrapolationTracking(tmpSector, 0, false); |
| 262 | + ExtrapolationTracking(tmpSector, false); |
321 | 263 | } |
322 | 264 | } |
323 | | - for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { |
324 | | - if (doGPU && transferRunning[iSector]) { |
| 265 | + if (doGPU) { |
| 266 | + ReleaseEvent(mEvents->init); |
| 267 | + for (uint32_t iSector = 0; iSector < NSECTORS; iSector++) { |
325 | 268 | ReleaseEvent(mEvents->sector[iSector]); |
326 | 269 | } |
327 | 270 | } |
328 | 271 | } else { |
329 | | - mSectorSelectorReady = NSECTORS; |
330 | 272 | mRec->runParallelOuterLoop(doGPU, NSECTORS, [&](uint32_t iSector) { |
331 | 273 | if (param().rec.tpc.extrapolationTracking) { |
332 | | - ExtrapolationTracking(iSector, 0); |
| 274 | + ExtrapolationTracking(iSector, true); |
333 | 275 | } |
334 | 276 | }); |
335 | 277 | mRec->SetNActiveThreadsOuterLoop(1); |
|
0 commit comments