Skip to content

Commit 2a21aa4

Browse files
committed
ITS: re-enable the possibility of extending tracks
1 parent 231abca commit 2a21aa4

16 files changed

Lines changed: 1640 additions & 6 deletions

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,7 @@
1919
#include "ITStracking/BoundedAllocator.h"
2020
#include "ITStracking/TimeFrame.h"
2121
#include "ITStracking/Configuration.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2223
#include "ITStrackingGPU/Utils.h"
2324

2425
namespace o2::its::gpu
@@ -90,8 +91,14 @@ class TimeFrameGPU : public TimeFrame<NLayers>
9091
void createNeighboursDevice(const unsigned int layer);
9192
void createNeighboursLUTDevice(const int, const unsigned int);
9293
void createTrackITSExtDevice(const size_t);
94+
void loadTrackExtensionStartStatesDevice();
95+
void createTrackExtensionCandidatesDevice(const size_t);
96+
void createTrackExtensionScratchDevice(const int nThreads, const int beamWidth);
97+
void createTrackExtensionResultsDevice(const size_t);
9398
void downloadTrackITSExtDevice();
9499
void downloadCellsNeighboursDevice(std::vector<bounded_vector<CellNeighbour>>&, const int);
100+
void downloadTrackExtensionCandidatesDevice();
101+
void downloadTrackExtensionResultsDevice();
95102
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
96103
void downloadCellsDevice();
97104
void downloadCellsLUTDevice();
@@ -118,13 +125,21 @@ class TimeFrameGPU : public TimeFrame<NLayers>
118125
const auto getDeviceTrackingTopologyView() const { return mDeviceTrackingTopologyView; }
119126
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
120127
auto& getTrackITSExt() { return mTrackITSExt; }
128+
auto& getTrackExtensionCandidates() { return mTrackExtensionCandidates; }
129+
auto& getTrackExtensionResults() { return mTrackExtensionResults; }
121130
Vertex* getDeviceVertices() { return mPrimaryVerticesDevice; }
122131
int* getDeviceROFramesPV() { return mROFramesPVDevice; }
123132
unsigned char* getDeviceUsedClusters(const int);
124133
const o2::base::Propagator* getChainPropagator();
125134

126135
// Hybrid
127136
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
137+
TrackExtensionStartState<NLayers>* getDeviceTrackExtensionStartStates() { return mTrackExtensionStartStatesDevice; }
138+
TrackExtensionCandidate<NLayers>* getDeviceTrackExtensionCandidates() { return mTrackExtensionCandidatesDevice; }
139+
int* getDeviceTrackExtensionCandidateOffsets() { return mTrackExtensionCandidateOffsetsDevice; }
140+
TrackExtensionHypothesis<NLayers>* getDeviceActiveTrackExtensionHypotheses() { return mActiveTrackExtensionHypothesesDevice; }
141+
TrackExtensionHypothesis<NLayers>* getDeviceNextTrackExtensionHypotheses() { return mNextTrackExtensionHypothesesDevice; }
142+
TrackExtensionResult<NLayers>* getDeviceTrackExtensionResults() { return mTrackExtensionResultsDevice; }
128143
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
129144
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
130145
CellNeighbour** getDeviceArrayNeighbours() { return mNeighboursDeviceArray; }
@@ -222,6 +237,13 @@ class TimeFrameGPU : public TimeFrame<NLayers>
222237
float** mCellSeedsChi2DeviceArray;
223238

224239
TrackITSExt* mTrackITSExtDevice;
240+
TrackExtensionStartState<NLayers>* mTrackExtensionStartStatesDevice{nullptr};
241+
TrackExtensionCandidate<NLayers>* mTrackExtensionCandidatesDevice{nullptr};
242+
int* mTrackExtensionCandidateOffsetsDevice{nullptr};
243+
TrackExtensionHypothesis<NLayers>* mActiveTrackExtensionHypothesesDevice{nullptr};
244+
TrackExtensionHypothesis<NLayers>* mNextTrackExtensionHypothesesDevice{nullptr};
245+
TrackExtensionResult<NLayers>* mTrackExtensionResultsDevice{nullptr};
246+
unsigned int mNTrackExtensionResults{0};
225247
std::array<CellNeighbour*, MaxCells> mNeighboursDevice{};
226248
CellNeighbour** mNeighboursDeviceArray{nullptr};
227249
std::array<TrackingFrameInfo*, NLayers> mTrackingFrameInfoDevice;
@@ -238,6 +260,12 @@ class TimeFrameGPU : public TimeFrame<NLayers>
238260

239261
// Temporary buffer for storing output tracks from GPU tracking
240262
bounded_vector<TrackITSExt> mTrackITSExt;
263+
// Temporary buffer for compact track states used by GPU track extension
264+
bounded_vector<TrackExtensionStartState<NLayers>> mTrackExtensionStartStates;
265+
// Temporary buffer for compact track extension proposals from GPU tracking
266+
bounded_vector<TrackExtensionCandidate<NLayers>> mTrackExtensionCandidates;
267+
// Temporary buffer for fitted track extension proposals from GPU tracking
268+
bounded_vector<TrackExtensionResult<NLayers>> mTrackExtensionResults;
241269
};
242270

243271
template <int NLayers>

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
3535
void computeLayerCells(const int iteration) final;
3636
void findCellsNeighbours(const int iteration) final;
3737
void findRoads(const int iteration) final;
38+
void extendTracks(const int iteration) final;
3839

3940
void setBz(float) final;
4041

@@ -47,6 +48,11 @@ class TrackerTraitsGPU final : public TrackerTraits<NLayers>
4748
int getTFNumberOfCells() const override;
4849

4950
private:
51+
bool hasTrackFollower(const int iteration) const;
52+
53+
void buildTrackExtensionCandidates(const int iteration, typename TrackerTraits<NLayers>::TrackExtensionCandidates& candidatesPerTrack) final;
54+
bool materializeTrackExtensionCandidate(TrackITSExt& track, const typename TrackerTraits<NLayers>::TrackExtensionCandidateN& candidate, const int iteration) final;
55+
5056
IndexTableUtilsN* mDeviceIndexTableUtils;
5157
gpu::TimeFrameGPU<NLayers>* mTimeFrameGPU;
5258
};

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 54 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,13 @@
1313
#ifndef ITSTRACKINGGPU_TRACKINGKERNELS_H_
1414
#define ITSTRACKINGGPU_TRACKINGKERNELS_H_
1515

16+
#include <array>
1617
#include <gsl/gsl>
1718

1819
#include "ITStracking/BoundedAllocator.h"
1920
#include "ITStracking/ROFLookupTables.h"
2021
#include "ITStracking/TrackingTopology.h"
22+
#include "ITStracking/TrackExtensionCandidate.h"
2123
#include "ITStrackingGPU/Utils.h"
2224
#include "DetectorsBase/Propagator.h"
2325

@@ -35,6 +37,58 @@ class Cluster;
3537
class TrackITSExt;
3638
class ExternalAllocator;
3739

40+
inline constexpr int kTrackExtensionLaunchBlocks = 60;
41+
inline constexpr int kTrackExtensionLaunchThreadsPerBlock = 256;
42+
inline constexpr int kTrackExtensionLaunchThreads = kTrackExtensionLaunchBlocks * kTrackExtensionLaunchThreadsPerBlock;
43+
44+
template <int NLayers>
45+
void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLayers>* tracks,
46+
const IndexTableUtils<NLayers>* utils,
47+
const typename ROFMaskTable<NLayers>::View& rofMask,
48+
const typename ROFOverlapTable<NLayers>::View& rofOverlaps,
49+
const Cluster** clusters,
50+
const unsigned char** usedClusters,
51+
const int** clustersIndexTables,
52+
const int** ROFClusters,
53+
const TrackingFrameInfo** trackingFrameInfo,
54+
TrackExtensionCandidate<NLayers>* candidates,
55+
int* candidateOffsets,
56+
TrackExtensionHypothesis<NLayers>* activeHypotheses,
57+
TrackExtensionHypothesis<NLayers>* nextHypotheses,
58+
const std::array<float, NLayers> layerRadii,
59+
const std::array<float, NLayers> layerxX0,
60+
const int nTracks,
61+
const int nLayers,
62+
const int phiBins,
63+
const int beamWidth,
64+
const bool extendTop,
65+
const bool extendBot,
66+
const float bz,
67+
const float maxChi2ClusterAttachment,
68+
const float maxChi2NDF,
69+
const float nSigmaCutPhi,
70+
const float nSigmaCutZ,
71+
const o2::base::Propagator* propagator,
72+
const o2::base::PropagatorF::MatCorrType matCorrType,
73+
gpu::Stream& stream);
74+
75+
template <int NLayers>
76+
void computeTrackExtensionResultsHandler(const TrackExtensionStartState<NLayers>* tracks,
77+
const TrackExtensionCandidate<NLayers>* candidates,
78+
const int* candidateOffsets,
79+
TrackExtensionResult<NLayers>* results,
80+
const TrackingFrameInfo** trackingFrameInfo,
81+
const std::array<float, NLayers> layerxX0,
82+
const int nTracks,
83+
const int nLayers,
84+
const float bz,
85+
const float maxChi2ClusterAttachment,
86+
const float maxChi2NDF,
87+
const o2::base::Propagator* propagator,
88+
const o2::base::PropagatorF::MatCorrType matCorrType,
89+
const bool shiftRefToCluster,
90+
gpu::Stream& stream);
91+
3892
template <int NLayers>
3993
void countTrackletsInROFsHandler(const IndexTableUtils<NLayers>* utils,
4094
const typename ROFMaskTable<NLayers>::View& rofMask,

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
#include <cuda_runtime.h>
1414

15+
#include <algorithm>
1516
#include <unistd.h>
1617
#include <vector>
1718

@@ -581,6 +582,87 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
581582
GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, mNTracks * sizeof(o2::its::TrackITSExt)));
582583
}
583584

585+
template <int NLayers>
586+
void TimeFrameGPU<NLayers>::loadTrackExtensionStartStatesDevice()
587+
{
588+
GPUTimer timer("loading track extension start states");
589+
GPULog("gpu-transfer: loading {} track extension start states, for {:.2f} MB.", this->mTracks.size(), this->mTracks.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>) / constants::MB);
590+
mTrackExtensionStartStatesDevice = nullptr;
591+
mTrackExtensionStartStates = bounded_vector<TrackExtensionStartState<NLayers>>(this->mTracks.size(), {}, this->getMemoryPool().get());
592+
if (this->mTracks.empty()) {
593+
return;
594+
}
595+
for (size_t iTrack{0}; iTrack < this->mTracks.size(); ++iTrack) {
596+
const auto& track = this->mTracks[iTrack];
597+
auto& state = mTrackExtensionStartStates[iTrack];
598+
state.paramIn = track.getParamIn();
599+
state.paramOut = track.getParamOut();
600+
state.time = track.getTimeStamp();
601+
state.chi2 = track.getChi2();
602+
state.nClusters = track.getNClusters();
603+
state.firstClusterLayer = static_cast<int>(track.getFirstClusterLayer());
604+
state.lastClusterLayer = static_cast<int>(track.getLastClusterLayer());
605+
for (int iLayer{0}; iLayer < NLayers; ++iLayer) {
606+
state.clusters[iLayer] = track.getClusterIndex(iLayer);
607+
}
608+
}
609+
allocMem(reinterpret_cast<void**>(&mTrackExtensionStartStatesDevice), mTrackExtensionStartStates.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
610+
GPUChkErrS(cudaMemcpy(mTrackExtensionStartStatesDevice, mTrackExtensionStartStates.data(), mTrackExtensionStartStates.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>), cudaMemcpyHostToDevice));
611+
}
612+
613+
template <int NLayers>
614+
void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nTracks)
615+
{
616+
GPUTimer timer("reserving track extension candidates");
617+
const size_t nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack;
618+
GPULog("gpu-allocation: reserving {} track extension candidates, for {:.2f} MB.", nCandidates, nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
619+
mTrackExtensionCandidates = bounded_vector<TrackExtensionCandidate<NLayers>>(nCandidates, {}, this->getMemoryPool().get());
620+
mTrackExtensionCandidatesDevice = nullptr;
621+
mTrackExtensionCandidateOffsetsDevice = nullptr;
622+
if (mTrackExtensionCandidates.empty()) {
623+
return;
624+
}
625+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
626+
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidateOffsetsDevice), (nTracks + 1) * sizeof(int), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
627+
}
628+
629+
template <int NLayers>
630+
void TimeFrameGPU<NLayers>::createTrackExtensionScratchDevice(const int nThreads, const int beamWidth)
631+
{
632+
GPUTimer timer("reserving track extension scratch");
633+
const size_t nHypotheses = static_cast<size_t>(std::max(1, nThreads)) * std::max(1, beamWidth);
634+
GPULog("gpu-allocation: reserving {} track extension hypotheses per scratch buffer, for {:.2f} MB each.", nHypotheses, nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>) / constants::MB);
635+
mActiveTrackExtensionHypothesesDevice = nullptr;
636+
mNextTrackExtensionHypothesesDevice = nullptr;
637+
if (nHypotheses == 0) {
638+
return;
639+
}
640+
allocMem(reinterpret_cast<void**>(&mActiveTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
641+
allocMem(reinterpret_cast<void**>(&mNextTrackExtensionHypothesesDevice), nHypotheses * sizeof(o2::its::TrackExtensionHypothesis<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
642+
}
643+
644+
template <int NLayers>
645+
void TimeFrameGPU<NLayers>::createTrackExtensionResultsDevice(const size_t nTracks)
646+
{
647+
GPUTimer timer("reserving fitted track extension results");
648+
mNTrackExtensionResults = 0;
649+
if (nTracks == 0 || mTrackExtensionCandidateOffsetsDevice == nullptr) {
650+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(0, {}, this->getMemoryPool().get());
651+
mTrackExtensionResultsDevice = nullptr;
652+
return;
653+
}
654+
int nResults{0};
655+
GPUChkErrS(cudaMemcpy(&nResults, mTrackExtensionCandidateOffsetsDevice + nTracks, sizeof(int), cudaMemcpyDeviceToHost));
656+
mNTrackExtensionResults = nResults;
657+
GPULog("gpu-allocation: reserving {} fitted track extension results, for {:.2f} MB.", mNTrackExtensionResults, mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
658+
mTrackExtensionResults = bounded_vector<TrackExtensionResult<NLayers>>(mNTrackExtensionResults, {}, this->getMemoryPool().get());
659+
mTrackExtensionResultsDevice = nullptr;
660+
if (mTrackExtensionResults.empty()) {
661+
return;
662+
}
663+
allocMem(reinterpret_cast<void**>(&mTrackExtensionResultsDevice), mNTrackExtensionResults * sizeof(o2::its::TrackExtensionResult<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
664+
}
665+
584666
template <int NLayers>
585667
void TimeFrameGPU<NLayers>::downloadCellsDevice()
586668
{
@@ -627,6 +709,28 @@ void TimeFrameGPU<NLayers>::downloadTrackITSExtDevice()
627709
GPUChkErrS(cudaMemcpy(mTrackITSExt.data(), mTrackITSExtDevice, mTrackITSExt.size() * sizeof(o2::its::TrackITSExt), cudaMemcpyDeviceToHost));
628710
}
629711

712+
template <int NLayers>
713+
void TimeFrameGPU<NLayers>::downloadTrackExtensionCandidatesDevice()
714+
{
715+
GPUTimer timer("downloading track extension candidates");
716+
GPULog("gpu-transfer: downloading {} track extension candidates, for {:.2f} MB.", mTrackExtensionCandidates.size(), mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
717+
if (mTrackExtensionCandidates.empty()) {
718+
return;
719+
}
720+
GPUChkErrS(cudaMemcpy(mTrackExtensionCandidates.data(), mTrackExtensionCandidatesDevice, mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>), cudaMemcpyDeviceToHost));
721+
}
722+
723+
template <int NLayers>
724+
void TimeFrameGPU<NLayers>::downloadTrackExtensionResultsDevice()
725+
{
726+
GPUTimer timer("downloading fitted track extension results");
727+
GPULog("gpu-transfer: downloading {} fitted track extension results, for {:.2f} MB.", mTrackExtensionResults.size(), mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>) / constants::MB);
728+
if (mTrackExtensionResults.empty()) {
729+
return;
730+
}
731+
GPUChkErrS(cudaMemcpy(mTrackExtensionResults.data(), mTrackExtensionResultsDevice, mTrackExtensionResults.size() * sizeof(o2::its::TrackExtensionResult<NLayers>), cudaMemcpyDeviceToHost));
732+
}
733+
630734
template <int NLayers>
631735
void TimeFrameGPU<NLayers>::unregisterHostMemory(const int maxLayers)
632736
{

0 commit comments

Comments
 (0)