Skip to content

Commit 2fda3aa

Browse files
committed
ITS: do final refit on GPU
1 parent e45e4cf commit 2fda3aa

8 files changed

Lines changed: 349 additions & 7 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -88,8 +88,10 @@ class TimeFrameGPU : public TimeFrame<NLayers>
8888
void loadTrackExtensionStartStatesDevice();
8989
void createTrackExtensionCandidatesDevice(const size_t);
9090
void createTrackExtensionScratchDevice(const size_t, const int);
91+
void createTrackExtensionResultsDevice(const size_t);
9192
void downloadTrackITSExtDevice();
9293
void downloadTrackExtensionCandidatesDevice();
94+
void downloadTrackExtensionResultsDevice();
9395
void downloadCellsNeighboursDevice(std::vector<bounded_vector<std::pair<int, int>>>&, const int);
9496
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
9597
void downloadCellsDevice();
@@ -117,6 +119,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
117119
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
118120
auto& getTrackITSExt() { return mTrackITSExt; }
119121
auto& getTrackExtensionCandidates() { return mTrackExtensionCandidates; }
122+
auto& getTrackExtensionResults() { return mTrackExtensionResults; }
120123
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
121124
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
122125
unsigned char* getDeviceUsedClusters(const int);
@@ -126,8 +129,10 @@ class TimeFrameGPU : public TimeFrame<NLayers>
126129
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
127130
TrackExtensionStartState<NLayers>* getDeviceTrackExtensionStartStates() { return mTrackExtensionStartStatesDevice; }
128131
TrackExtensionCandidate<NLayers>* getDeviceTrackExtensionCandidates() { return mTrackExtensionCandidatesDevice; }
132+
int* getDeviceTrackExtensionCandidateOffsets() { return mTrackExtensionCandidateOffsetsDevice; }
129133
TrackExtensionHypothesis<NLayers>* getDeviceActiveTrackExtensionHypotheses() { return mActiveTrackExtensionHypothesesDevice; }
130134
TrackExtensionHypothesis<NLayers>* getDeviceNextTrackExtensionHypotheses() { return mNextTrackExtensionHypothesesDevice; }
135+
TrackExtensionResult<NLayers>* getDeviceTrackExtensionResults() { return mTrackExtensionResultsDevice; }
131136
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
132137
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
133138
gpuPair<int, int>* getDeviceNeighbourPairs(const int layer) { return mNeighbourPairsDevice[layer]; }
@@ -227,8 +232,11 @@ class TimeFrameGPU : public TimeFrame<NLayers>
227232
TrackITSExt* mTrackITSExtDevice;
228233
TrackExtensionStartState<NLayers>* mTrackExtensionStartStatesDevice{nullptr};
229234
TrackExtensionCandidate<NLayers>* mTrackExtensionCandidatesDevice{nullptr};
235+
int* mTrackExtensionCandidateOffsetsDevice{nullptr};
230236
TrackExtensionHypothesis<NLayers>* mActiveTrackExtensionHypothesesDevice{nullptr};
231237
TrackExtensionHypothesis<NLayers>* mNextTrackExtensionHypothesesDevice{nullptr};
238+
TrackExtensionResult<NLayers>* mTrackExtensionResultsDevice{nullptr};
239+
unsigned int mNTrackExtensionResults{0};
232240
std::array<gpuPair<int, int>*, NLayers - 2> mNeighbourPairsDevice;
233241
std::array<int*, NLayers - 2> mNeighboursDevice;
234242
std::array<TrackingFrameInfo*, NLayers> mTrackingFrameInfoDevice;
@@ -249,6 +257,8 @@ class TimeFrameGPU : public TimeFrame<NLayers>
249257
bounded_vector<TrackExtensionStartState<NLayers>> mTrackExtensionStartStates;
250258
// Temporary buffer for compact track extension proposals from GPU tracking
251259
bounded_vector<TrackExtensionCandidate<NLayers>> mTrackExtensionCandidates;
260+
// Temporary buffer for fitted track extension proposals from GPU tracking
261+
bounded_vector<TrackExtensionResult<NLayers>> mTrackExtensionResults;
252262
};
253263

254264
template <int NLayers>

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
5151
bool hasTrackFollower(const int iteration) const;
5252

5353
void buildTrackExtensionCandidates(const int iteration, typename TrackerTraits<NLayers>::TrackExtensionCandidates& candidatesPerTrack) final;
54+
bool materializeTrackExtensionCandidate(TrackITSExt& track, const typename TrackerTraits<NLayers>::TrackExtensionCandidateN& candidate, const int iteration) final;
5455

5556
IndexTableUtilsN* mDeviceIndexTableUtils;
5657
gpu::TimeFrameGPU<NLayers>* mTimeFrameGPU;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLaye
4747
const int** ROFClusters,
4848
const TrackingFrameInfo** trackingFrameInfo,
4949
TrackExtensionCandidate<NLayers>* candidates,
50+
int* candidateOffsets,
5051
TrackExtensionHypothesis<NLayers>* activeHypotheses,
5152
TrackExtensionHypothesis<NLayers>* nextHypotheses,
5253
const std::vector<float>& layerRadiiHost,
@@ -66,6 +67,23 @@ void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLaye
6667
const o2::base::PropagatorF::MatCorrType matCorrType,
6768
gpu::Stream& stream);
6869

70+
template <int NLayers>
71+
void computeTrackExtensionResultsHandler(const TrackExtensionStartState<NLayers>* tracks,
72+
const TrackExtensionCandidate<NLayers>* candidates,
73+
const int* candidateOffsets,
74+
TrackExtensionResult<NLayers>* results,
75+
const TrackingFrameInfo** trackingFrameInfo,
76+
const std::vector<float>& layerxX0Host,
77+
const int nTracks,
78+
const int nLayers,
79+
const float bz,
80+
const float maxChi2ClusterAttachment,
81+
const float maxChi2NDF,
82+
const o2::base::Propagator* propagator,
83+
const o2::base::PropagatorF::MatCorrType matCorrType,
84+
const bool shiftRefToCluster,
85+
gpu::Stream& stream);
86+
6987
template <int NLayers>
7088
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
7189
const typename ROFMaskTable<NLayers>::View& rofMask,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -569,10 +569,12 @@ void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nT
569569
GPULog("gpu-allocation: reserving {} track extension candidates, for {:.2f} MB.", nCandidates, nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
570570
mTrackExtensionCandidates = bounded_vector<TrackExtensionCandidate<NLayers>>(nCandidates, {}, this->getMemoryPool().get());
571571
mTrackExtensionCandidatesDevice = nullptr;
572+
mTrackExtensionCandidateOffsetsDevice = nullptr;
572573
if (mTrackExtensionCandidates.empty()) {
573574
return;
574575
}
575576
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
577+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidateOffsetsDevice), (nTracks + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
576578
}
577579

578580
template <int NLayers>
@@ -590,6 +592,28 @@ void TimeFrameGPU<NLayers>::createTrackExtensionScratchDevice(const size_t nTrac
590592
allocMem(reinterpret_cast<void**>(&mNextTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
591593
}
592594

595+
template <int NLayers>
596+
void TimeFrameGPU<NLayers>::createTrackExtensionResultsDevice(const size_t nTracks)
597+
{
598+
GPUTimer timer("reserving fitted track extension results");
599+
mNTrackExtensionResults = 0;
600+
if (nTracks == 0 || mTrackExtensionCandidateOffsetsDevice == nullptr) {
601+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(0, {}, this->getMemoryPool().get());
602+
mTrackExtensionResultsDevice = nullptr;
603+
return;
604+
}
605+
int nResults{0};
606+
GPUChkErrS(cudaMemcpy(&nResults, mTrackExtensionCandidateOffsetsDevice + nTracks, sizeof(int), cudaMemcpyDeviceToHost));
607+
mNTrackExtensionResults = nResults;
608+
GPULog("gpu-allocation: reserving {} fitted track extension results, for {:.2f} MB.", mNTrackExtensionResults, mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
609+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(mNTrackExtensionResults, {}, this->getMemoryPool().get());
610+
mTrackExtensionResultsDevice = nullptr;
611+
if (mTrackExtensionResults.empty()) {
612+
return;
613+
}
614+
allocMem(reinterpret_cast<void**>(&mTrackExtensionResultsDevice), mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
615+
}
616+
593617
template <int NLayers>
594618
void TimeFrameGPU<NLayers>::downloadCellsDevice()
595619
{
@@ -647,6 +671,17 @@ void TimeFrameGPU<NLayers>::downloadTrackExtensionCandidatesDevice()
647671
GPUChkErrS(cudaMemcpy(mTrackExtensionCandidates.data(), mTrackExtensionCandidatesDevice, mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>), cudaMemcpyDeviceToHost));
648672
}
649673

674+
template <int NLayers>
675+
void TimeFrameGPU<NLayers>::downloadTrackExtensionResultsDevice()
676+
{
677+
GPUTimer timer("downloading fitted track extension results");
678+
GPULog("gpu-transfer: downloading {} fitted track extension results, for {:.2f} MB.", mTrackExtensionResults.size(), mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
679+
if (mTrackExtensionResults.empty()) {
680+
return;
681+
}
682+
GPUChkErrS(cudaMemcpy(mTrackExtensionResults.data(), mTrackExtensionResultsDevice, mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>), cudaMemcpyDeviceToHost));
683+
}
684+
650685
template <int NLayers>
651686
void TimeFrameGPU<NLayers>::unregisterHostMemory(const int maxLayers)
652687
{

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 51 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -409,6 +409,7 @@ void TrackerTraitsGPU<NLayers>::buildTrackExtensionCandidates(const int iteratio
409409
mTimeFrameGPU->getDeviceROFrameClusters(),
410410
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
411411
mTimeFrameGPU->getDeviceTrackExtensionCandidates(),
412+
mTimeFrameGPU->getDeviceTrackExtensionCandidateOffsets(),
412413
mTimeFrameGPU->getDeviceActiveTrackExtensionHypotheses(),
413414
mTimeFrameGPU->getDeviceNextTrackExtensionHypotheses(),
414415
this->mTrkParams[iteration].LayerRadii,
@@ -427,12 +428,57 @@ void TrackerTraitsGPU<NLayers>::buildTrackExtensionCandidates(const int iteratio
427428
mTimeFrameGPU->getDevicePropagator(),
428429
this->mTrkParams[iteration].CorrType,
429430
mTimeFrameGPU->getStream(0));
430-
mTimeFrameGPU->downloadTrackExtensionCandidatesDevice();
431-
this->importFlatTrackExtensionCandidates(mTimeFrameGPU->getTrackExtensionCandidates(), candidatesPerTrack);
432-
if (std::any_of(candidatesPerTrack.begin(), candidatesPerTrack.end(), [](const auto& candidates) { return !candidates.empty(); })) {
433-
return;
431+
mTimeFrameGPU->createTrackExtensionResultsDevice(nTracks);
432+
computeTrackExtensionResultsHandler<NLayers>(mTimeFrameGPU->getDeviceTrackExtensionStartStates(),
433+
mTimeFrameGPU->getDeviceTrackExtensionCandidates(),
434+
mTimeFrameGPU->getDeviceTrackExtensionCandidateOffsets(),
435+
mTimeFrameGPU->getDeviceTrackExtensionResults(),
436+
mTimeFrameGPU->getDeviceArrayTrackingFrameInfo(),
437+
this->mTrkParams[iteration].LayerxX0,
438+
static_cast<int>(nTracks),
439+
this->mTrkParams[iteration].NLayers,
440+
this->mBz,
441+
this->mTrkParams[iteration].MaxChi2ClusterAttachment,
442+
this->mTrkParams[iteration].MaxChi2NDF,
443+
mTimeFrameGPU->getDevicePropagator(),
444+
this->mTrkParams[iteration].CorrType,
445+
this->mTrkParams[iteration].ShiftRefToCluster,
446+
mTimeFrameGPU->getStream(0));
447+
mTimeFrameGPU->downloadTrackExtensionResultsDevice();
448+
449+
const auto& results = mTimeFrameGPU->getTrackExtensionResults();
450+
for (int iResult{0}; iResult < static_cast<int>(results.size()); ++iResult) {
451+
const auto& result = results[iResult];
452+
if (!result.isValid()) {
453+
continue;
454+
}
455+
auto candidate = result.candidate;
456+
candidate.fittedTrackIndex = iResult;
457+
candidatesPerTrack[result.trackIndex].push_back(candidate);
458+
}
459+
}
460+
461+
template <int NLayers>
462+
bool TrackerTraitsGPU<NLayers>::materializeTrackExtensionCandidate(TrackITSExt& track, const typename TrackerTraits<NLayers>::TrackExtensionCandidateN& candidate, const int iteration)
463+
{
464+
const auto& results = mTimeFrameGPU->getTrackExtensionResults();
465+
if (candidate.fittedTrackIndex < 0 || candidate.fittedTrackIndex >= static_cast<int>(results.size())) {
466+
return TrackerTraits<NLayers>::materializeTrackExtensionCandidate(track, candidate, iteration);
467+
}
468+
const auto& result = results[candidate.fittedTrackIndex];
469+
if (!result.isValid() || result.trackIndex != candidate.trackIndex) {
470+
return false;
471+
}
472+
track = result.track;
473+
this->updateExtendedTrackTimeStamp(track, iteration);
474+
uint32_t diff{0};
475+
for (int iLayer{0}; iLayer < this->mTrkParams[iteration].NLayers; ++iLayer) {
476+
if (candidate.addedClusters[iLayer] != constants::UnusedIndex) {
477+
diff |= (0x1u << iLayer);
478+
}
434479
}
435-
TrackerTraits<NLayers>::buildTrackExtensionCandidates(iteration, candidatesPerTrack);
480+
track.setPattern(track.getPattern() | (diff << 24));
481+
return true;
436482
}
437483

438484
template <int NLayers>

0 commit comments

Comments
 (0)