Skip to content

Commit 816e4e0

Browse files
committed
ITS: continue with GPU integration
1 parent 3267bbb commit 816e4e0

6 files changed

Lines changed: 74 additions & 9 deletions

File tree

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -85,6 +85,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
8585
void createNeighboursDevice(const unsigned int layer);
8686
void createNeighboursLUTDevice(const int, const unsigned int);
8787
void createTrackITSExtDevice(const size_t);
88+
void loadTrackExtensionStartStatesDevice();
8889
void createTrackExtensionCandidatesDevice(const size_t);
8990
void downloadTrackITSExtDevice();
9091
void downloadTrackExtensionCandidatesDevice();
@@ -122,6 +123,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
122123

123124
// Hybrid
124125
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
126+
TrackExtensionStartState<NLayers>* getDeviceTrackExtensionStartStates() { return mTrackExtensionStartStatesDevice; }
125127
TrackExtensionCandidate<NLayers>* getDeviceTrackExtensionCandidates() { return mTrackExtensionCandidatesDevice; }
126128
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
127129
gsl::span<int*> getDeviceNeighboursLUTs() { return mNeighboursLUTDevice; }
@@ -220,6 +222,7 @@ class TimeFrameGPU : public TimeFrame<NLayers>
220222
float** mCellSeedsChi2DeviceArray;
221223

222224
TrackITSExt* mTrackITSExtDevice;
225+
TrackExtensionStartState<NLayers>* mTrackExtensionStartStatesDevice{nullptr};
223226
TrackExtensionCandidate<NLayers>* mTrackExtensionCandidatesDevice{nullptr};
224227
std::array<gpuPair<int, int>*, NLayers - 2> mNeighbourPairsDevice;
225228
std::array<int*, NLayers - 2> mNeighboursDevice;
@@ -237,6 +240,8 @@ class TimeFrameGPU : public TimeFrame<NLayers>
237240

238241
// Temporary buffer for storing output tracks from GPU tracking
239242
bounded_vector<TrackITSExt> mTrackITSExt;
243+
// Temporary buffer for compact track states used by GPU track extension
244+
bounded_vector<TrackExtensionStartState<NLayers>> mTrackExtensionStartStates;
240245
// Temporary buffer for compact track extension proposals from GPU tracking
241246
bounded_vector<TrackExtensionCandidate<NLayers>> mTrackExtensionCandidates;
242247
};

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,8 @@ class TrackITSExt;
3737
class ExternalAllocator;
3838

3939
template <int NLayers>
40-
void computeTrackExtensionCandidatesHandler(TrackExtensionCandidate<NLayers>* candidates,
40+
void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLayers>* tracks,
41+
TrackExtensionCandidate<NLayers>* candidates,
4142
const int nTracks,
4243
gpu::Stream& stream);
4344

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TimeFrameGPU.cu

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -532,13 +532,45 @@ void TimeFrameGPU<NLayers>::createTrackITSExtDevice(const size_t nSeeds)
532532
GPUChkErrS(cudaMemset(mTrackITSExtDevice, 0, mNTracks * sizeof(o2::its::TrackITSExt)));
533533
}
534534

535+
template <int NLayers>
536+
void TimeFrameGPU<NLayers>::loadTrackExtensionStartStatesDevice()
537+
{
538+
GPUTimer timer("loading track extension start states");
539+
GPULog("gpu-transfer: loading {} track extension start states, for {:.2f} MB.", this->mTracks.size(), this->mTracks.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>) / constants::MB);
540+
mTrackExtensionStartStatesDevice = nullptr;
541+
mTrackExtensionStartStates = bounded_vector<TrackExtensionStartState<NLayers>>(this->mTracks.size(), {}, this->getMemoryPool().get());
542+
if (this->mTracks.empty()) {
543+
return;
544+
}
545+
for (size_t iTrack{0}; iTrack < this->mTracks.size(); ++iTrack) {
546+
const auto& track = this->mTracks[iTrack];
547+
auto& state = mTrackExtensionStartStates[iTrack];
548+
state.paramIn = track.getParamIn();
549+
state.paramOut = track.getParamOut();
550+
state.time = track.getTimeStamp();
551+
state.chi2 = track.getChi2();
552+
state.nClusters = track.getNClusters();
553+
state.firstClusterLayer = static_cast<int>(track.getFirstClusterLayer());
554+
state.lastClusterLayer = static_cast<int>(track.getLastClusterLayer());
555+
for (int iLayer{0}; iLayer < NLayers; ++iLayer) {
556+
state.clusters[iLayer] = track.getClusterIndex(iLayer);
557+
}
558+
}
559+
allocMem(reinterpret_cast<void**>(&mTrackExtensionStartStatesDevice), mTrackExtensionStartStates.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
560+
GPUChkErrS(cudaMemcpy(mTrackExtensionStartStatesDevice, mTrackExtensionStartStates.data(), mTrackExtensionStartStates.size() * sizeof(o2::its::TrackExtensionStartState<NLayers>), cudaMemcpyHostToDevice));
561+
}
562+
535563
template <int NLayers>
536564
void TimeFrameGPU<NLayers>::createTrackExtensionCandidatesDevice(const size_t nTracks)
537565
{
538566
GPUTimer timer("reserving track extension candidates");
539567
const size_t nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack;
540568
GPULog("gpu-allocation: reserving {} track extension candidates, for {:.2f} MB.", nCandidates, nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
541569
mTrackExtensionCandidates = bounded_vector<TrackExtensionCandidate<NLayers>>(nCandidates, {}, this->getMemoryPool().get());
570+
mTrackExtensionCandidatesDevice = nullptr;
571+
if (mTrackExtensionCandidates.empty()) {
572+
return;
573+
}
542574
allocMem(reinterpret_cast<void**>(&mTrackExtensionCandidatesDevice), nCandidates * sizeof(o2::its::TrackExtensionCandidate<NLayers>), this->hasFrameworkAllocator(), (o2::gpu::GPUMemoryResource::MEMORY_GPU | o2::gpu::GPUMemoryResource::MEMORY_STACK));
543575
}
544576

@@ -593,6 +625,9 @@ void TimeFrameGPU<NLayers>::downloadTrackExtensionCandidatesDevice()
593625
{
594626
GPUTimer timer("downloading track extension candidates");
595627
GPULog("gpu-transfer: downloading {} track extension candidates, for {:.2f} MB.", mTrackExtensionCandidates.size(), mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>) / constants::MB);
628+
if (mTrackExtensionCandidates.empty()) {
629+
return;
630+
}
596631
GPUChkErrS(cudaMemcpy(mTrackExtensionCandidates.data(), mTrackExtensionCandidatesDevice, mTrackExtensionCandidates.size() * sizeof(o2::its::TrackExtensionCandidate<NLayers>), cudaMemcpyDeviceToHost));
597632
}
598633

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackerTraitsGPU.cxx

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -393,9 +393,12 @@ bool TrackerTraitsGPU<NLayers>::hasTrackFollower(const int iteration) const
393393
template <int NLayers>
394394
void TrackerTraitsGPU<NLayers>::buildTrackExtensionCandidates(const int iteration, typename TrackerTraits<NLayers>::TrackExtensionCandidates& candidatesPerTrack)
395395
{
396-
mTimeFrameGPU->createTrackExtensionCandidatesDevice(this->mTimeFrame->getTracks().size());
397-
computeTrackExtensionCandidatesHandler<NLayers>(mTimeFrameGPU->getDeviceTrackExtensionCandidates(),
398-
static_cast<int>(this->mTimeFrame->getTracks().size()),
396+
const auto nTracks = this->mTimeFrame->getTracks().size();
397+
mTimeFrameGPU->loadTrackExtensionStartStatesDevice();
398+
mTimeFrameGPU->createTrackExtensionCandidatesDevice(nTracks);
399+
computeTrackExtensionCandidatesHandler<NLayers>(mTimeFrameGPU->getDeviceTrackExtensionStartStates(),
400+
mTimeFrameGPU->getDeviceTrackExtensionCandidates(),
401+
static_cast<int>(nTracks),
399402
mTimeFrameGPU->getStream(0));
400403
mTimeFrameGPU->downloadTrackExtensionCandidatesDevice();
401404
this->importFlatTrackExtensionCandidates(mTimeFrameGPU->getTrackExtensionCandidates(), candidatesPerTrack);

Detectors/ITSMFT/ITS/tracking/GPU/cuda/TrackingKernels.cu

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -104,8 +104,9 @@ struct compare_track_chi2 {
104104
};
105105

106106
template <int NLayers>
107-
GPUg() void __launch_bounds__(256, 1) resetTrackExtensionCandidatesKernel(TrackExtensionCandidate<NLayers>* candidates, const int nTracks)
107+
GPUg() void __launch_bounds__(256, 1) resetTrackExtensionCandidatesKernel(const TrackExtensionStartState<NLayers>* tracks, TrackExtensionCandidate<NLayers>* candidates, const int nTracks)
108108
{
109+
(void)tracks;
109110
const int nCandidates = nTracks * MaxTrackExtensionCandidatesPerTrack;
110111
for (int iCandidate = blockIdx.x * blockDim.x + threadIdx.x; iCandidate < nCandidates; iCandidate += blockDim.x * gridDim.x) {
111112
candidates[iCandidate].reset();
@@ -570,11 +571,15 @@ GPUg() void __launch_bounds__(256, 1) processNeighboursKernel(
570571
} // namespace gpu
571572

572573
template <int NLayers>
573-
void computeTrackExtensionCandidatesHandler(TrackExtensionCandidate<NLayers>* candidates,
574+
void computeTrackExtensionCandidatesHandler(const TrackExtensionStartState<NLayers>* tracks,
575+
TrackExtensionCandidate<NLayers>* candidates,
574576
const int nTracks,
575577
gpu::Stream& stream)
576578
{
577-
gpu::resetTrackExtensionCandidatesKernel<NLayers><<<60, 256, 0, stream.get()>>>(candidates, nTracks);
579+
if (nTracks <= 0 || candidates == nullptr) {
580+
return;
581+
}
582+
gpu::resetTrackExtensionCandidatesKernel<NLayers><<<60, 256, 0, stream.get()>>>(tracks, candidates, nTracks);
578583
}
579584

580585
template <int NLayers>
@@ -1096,7 +1101,8 @@ void computeTrackSeedHandler(TrackSeed<NLayers>* trackSeeds,
10961101
}
10971102

10981103
/// Explicit instantiation of ITS2 handlers
1099-
template void computeTrackExtensionCandidatesHandler<7>(TrackExtensionCandidate<7>* candidates,
1104+
template void computeTrackExtensionCandidatesHandler<7>(const TrackExtensionStartState<7>* tracks,
1105+
TrackExtensionCandidate<7>* candidates,
11001106
const int nTracks,
11011107
gpu::Stream& stream);
11021108

@@ -1281,7 +1287,8 @@ template void computeTrackSeedHandler(TrackSeed<7>* trackSeeds,
12811287

12821288
/// Explicit instantiation of ALICE3 handlers
12831289
#ifdef ENABLE_UPGRADES
1284-
template void computeTrackExtensionCandidatesHandler<11>(TrackExtensionCandidate<11>* candidates,
1290+
template void computeTrackExtensionCandidatesHandler<11>(const TrackExtensionStartState<11>* tracks,
1291+
TrackExtensionCandidate<11>* candidates,
12851292
const int nTracks,
12861293
gpu::Stream& stream);
12871294

Detectors/ITSMFT/ITS/tracking/include/ITStracking/TrackExtensionCandidate.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,11 +16,25 @@
1616
#include <cstddef>
1717

1818
#include "GPUCommonDef.h"
19+
#include "DataFormatsITS/TimeEstBC.h"
1920
#include "ITStracking/Constants.h"
21+
#include "ReconstructionDataFormats/Track.h"
2022

2123
namespace o2::its
2224
{
2325

26+
template <int NLayers>
27+
struct TrackExtensionStartState {
28+
o2::track::TrackParCov paramIn;
29+
o2::track::TrackParCov paramOut;
30+
std::array<int, NLayers> clusters{};
31+
TimeStamp time;
32+
float chi2{0.f};
33+
int nClusters{0};
34+
int firstClusterLayer{constants::UnusedIndex};
35+
int lastClusterLayer{constants::UnusedIndex};
36+
};
37+
2438
template <int NLayers>
2539
struct TrackExtensionCandidate {
2640
static constexpr float InvalidChi2 = 1.e20f;

0 commit comments

Comments
 (0)