Skip to content

Commit 0364324

Browse files
authored
ITS: GPU: prepare to lazy loading of data (#14585)
1 parent 7262c36 commit 0364324

File tree

8 files changed

+708
-692
lines changed

8 files changed

+708
-692
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 52 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -13,13 +13,14 @@
1313
#ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
1414
#define TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
1515

16+
#include <gsl/gsl>
17+
#include <bitset>
18+
1619
#include "ITStracking/BoundedAllocator.h"
1720
#include "ITStracking/TimeFrame.h"
1821
#include "ITStracking/Configuration.h"
1922
#include "ITStrackingGPU/Utils.h"
2023

21-
#include <gsl/gsl>
22-
2324
namespace o2::its::gpu
2425
{
2526

@@ -28,7 +29,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
2829
{
2930
public:
3031
TimeFrameGPU();
31-
~TimeFrameGPU();
32+
~TimeFrameGPU() = default;
3233

3334
/// Most relevant operations
3435
void registerHostMemory(const int);
@@ -37,18 +38,25 @@ class TimeFrameGPU : public TimeFrame<nLayers>
3738
void initDevice(IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int, const int);
3839
void initDeviceSAFitting();
3940
void loadIndexTableUtils(const int);
40-
void loadTrackingFrameInfoDevice(const int);
41-
void loadUnsortedClustersDevice(const int);
42-
void loadClustersDevice(const int);
43-
void loadClustersIndexTables(const int iteration);
44-
void createUsedClustersDevice(const int);
41+
void loadTrackingFrameInfoDevice(const int, const int);
42+
void createTrackingFrameInfoDeviceArray(const int);
43+
void loadUnsortedClustersDevice(const int, const int);
44+
void createUnsortedClustersDeviceArray(const int);
45+
void loadClustersDevice(const int, const int);
46+
void createClustersDeviceArray(const int);
47+
void loadClustersIndexTables(const int, const int);
48+
void createClustersIndexTablesArray(const int iteration);
49+
void createUsedClustersDevice(const int, const int);
50+
void createUsedClustersDeviceArray(const int);
4551
void loadUsedClustersDevice();
46-
void loadROframeClustersDevice(const int);
52+
void loadROFrameClustersDevice(const int, const int);
53+
void createROFrameClustersDeviceArray(const int);
4754
void loadMultiplicityCutMask(const int);
4855
void loadVertices(const int);
4956

5057
///
51-
void createTrackletsLUTDevice(const int);
58+
void createTrackletsLUTDevice(const int, const int);
59+
void createTrackletsLUTDeviceArray(const int);
5260
void loadTrackletsDevice();
5361
void loadTrackletsLUTDevice();
5462
void loadCellsDevice();
@@ -57,11 +65,14 @@ class TimeFrameGPU : public TimeFrame<nLayers>
5765
void loadTrackSeedsChi2Device();
5866
void loadRoadsDevice();
5967
void loadTrackSeedsDevice(bounded_vector<CellSeed>&);
60-
void createTrackletsBuffers();
68+
void createTrackletsBuffers(const int);
69+
void createTrackletsBuffersArray(const int);
6170
void createCellsBuffers(const int);
71+
void createCellsBuffersArray(const int);
6272
void createCellsDevice();
63-
void createCellsLUTDevice();
64-
void createNeighboursIndexTablesDevice();
73+
void createCellsLUTDevice(const int);
74+
void createCellsLUTDeviceArray(const int);
75+
void createNeighboursIndexTablesDevice(const int);
6576
void createNeighboursDevice(const unsigned int layer);
6677
void createNeighboursLUTDevice(const int, const unsigned int);
6778
void createTrackITSExtDevice(bounded_vector<CellSeed>&);
@@ -70,10 +81,17 @@ class TimeFrameGPU : public TimeFrame<nLayers>
7081
void downloadNeighboursLUTDevice(bounded_vector<int>&, const int);
7182
void downloadCellsDevice();
7283
void downloadCellsLUTDevice();
84+
85+
/// synchronization
7386
auto& getStream(const size_t stream) { return mGpuStreams[stream]; }
7487
auto& getStreams() { return mGpuStreams; }
7588
void syncStream(const size_t stream);
76-
void syncStreams();
89+
void syncStreams(const bool = true);
90+
void waitEvent(const int, const int);
91+
void recordEvent(const int);
92+
void recordEvents(const int = 0, const int = nLayers);
93+
94+
/// cleanup
7795
virtual void wipe() final;
7896

7997
/// interface
@@ -102,19 +120,19 @@ class TimeFrameGPU : public TimeFrame<nLayers>
102120
const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
103121
std::vector<unsigned int> getClusterSizes();
104122
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
105-
const int** getDeviceROframeClusters() const { return mROFrameClustersDeviceArray; }
106-
Tracklet** getDeviceArrayTracklets() { return mTrackletsDevice.data(); }
123+
const int** getDeviceROFrameClusters() const { return mROFramesClustersDeviceArray; }
124+
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
107125
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
108126
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
109127
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
110-
CellSeed** getDeviceArrayCells() { return mCellsDevice.data(); }
128+
CellSeed** getDeviceArrayCells() { return mCellsDeviceArray; }
111129
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
112130
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
113131
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
114132
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
115133
uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }
116134

117-
void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;
135+
void setDevicePropagator(const o2::base::PropagatorImpl<float>* p) final { this->mPropagatorDevice = p; }
118136

119137
// Host-specific getters
120138
gsl::span<int, nLayers - 1> getNTracklets() { return mNTracklets; }
@@ -126,7 +144,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
126144
// Host-available device getters
127145
gsl::span<int*> getDeviceTrackletsLUTs() { return mTrackletsLUTDevice; }
128146
gsl::span<int*> getDeviceCellLUTs() { return mCellsLUTDevice; }
129-
gsl::span<Tracklet*> getDeviceTracklet() { return mTrackletsDevice; }
147+
gsl::span<Tracklet*> getDeviceTracklets() { return mTrackletsDevice; }
130148
gsl::span<CellSeed*> getDeviceCells() { return mCellsDevice; }
131149

132150
// Overridden getters
@@ -137,7 +155,6 @@ class TimeFrameGPU : public TimeFrame<nLayers>
137155
private:
138156
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations on specific stream
139157
void allocMem(void**, size_t, bool); // Abstract owned and unowned memory allocations on default stream
140-
bool mHostRegistered = false;
141158
TimeFrameGPUParameters mGpuParams;
142159

143160
// Host-available device buffer sizes
@@ -161,19 +178,21 @@ class TimeFrameGPU : public TimeFrame<nLayers>
161178
const Cluster** mUnsortedClustersDeviceArray;
162179
const int** mClustersIndexTablesDeviceArray;
163180
const unsigned char** mUsedClustersDeviceArray;
164-
const int** mROFrameClustersDeviceArray;
181+
const int** mROFramesClustersDeviceArray;
165182
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
166183
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
167184
std::array<int*, nLayers - 2> mCellsLUTDevice;
168185
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
169186

170-
int** mCellsLUTDeviceArray;
171-
int** mNeighboursCellDeviceArray;
172-
int** mNeighboursCellLUTDeviceArray;
173-
int** mTrackletsLUTDeviceArray;
187+
Tracklet** mTrackletsDeviceArray{nullptr};
188+
int** mCellsLUTDeviceArray{nullptr};
189+
int** mNeighboursCellDeviceArray{nullptr};
190+
int** mNeighboursCellLUTDeviceArray{nullptr};
191+
int** mTrackletsLUTDeviceArray{nullptr};
174192
std::array<CellSeed*, nLayers - 2> mCellsDevice;
175-
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
176-
CellSeed* mTrackSeedsDevice;
193+
CellSeed** mCellsDeviceArray;
194+
std::array<int*, nLayers - 3> mNeighboursIndexTablesDevice;
195+
CellSeed* mTrackSeedsDevice{nullptr};
177196
std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;
178197
o2::track::TrackParCovF** mCellSeedsDeviceArray;
179198
std::array<float*, nLayers - 2> mCellSeedsChi2Device;
@@ -188,6 +207,12 @@ class TimeFrameGPU : public TimeFrame<nLayers>
188207

189208
// State
190209
Streams mGpuStreams;
210+
std::bitset<nLayers + 1> mPinnedUnsortedClusters{0};
211+
std::bitset<nLayers + 1> mPinnedClusters{0};
212+
std::bitset<nLayers + 1> mPinnedClustersIndexTables{0};
213+
std::bitset<nLayers + 1> mPinnedUsedClusters{0};
214+
std::bitset<nLayers + 1> mPinnedROFramesClusters{0};
215+
std::bitset<nLayers + 1> mPinnedTrackingFrameInfo{0};
191216

192217
// Temporary buffer for storing output tracks from GPU tracking
193218
bounded_vector<TrackITSExt> mTrackITSExt;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackerTraitsGPU.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ class TrackerTraitsGPU final : public TrackerTraits<nLayers>
2424
{
2525
public:
2626
TrackerTraitsGPU() = default;
27-
~TrackerTraitsGPU() override = default;
27+
~TrackerTraitsGPU() final = default;
2828

2929
void adoptTimeFrame(TimeFrame<nLayers>* tf) final;
3030
void initialiseTimeFrame(const int iteration) final;

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 30 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,10 @@
1313
#ifndef ITSTRACKINGGPU_TRACKINGKERNELS_H_
1414
#define ITSTRACKINGGPU_TRACKINGKERNELS_H_
1515

16+
#include <gsl/gsl>
17+
18+
#include "ITStracking/BoundedAllocator.h"
19+
#include "ITStrackingGPU/Utils.h"
1620
#include "DetectorsBase/Propagator.h"
1721
#include "GPUCommonDef.h"
1822

@@ -25,43 +29,43 @@ namespace gpu
2529

2630
#ifdef GPUCA_GPUCODE // GPUg() global kernels must only when compiled by GPU compiler
2731

28-
GPUdi() int4 getEmptyBinsRect()
32+
GPUdii() int4 getEmptyBinsRect()
2933
{
3034
return int4{0, 0, 0, 0};
3135
}
3236

33-
GPUd() bool fitTrack(TrackITSExt& track,
34-
int start,
35-
int end,
36-
int step,
37-
float chi2clcut,
38-
float chi2ndfcut,
39-
float maxQoverPt,
40-
int nCl,
41-
float Bz,
42-
TrackingFrameInfo** tfInfos,
43-
const o2::base::Propagator* prop,
44-
o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorImpl<float>::MatCorrType::USEMatCorrNONE);
37+
GPUdii() bool fitTrack(TrackITSExt& track,
38+
int start,
39+
int end,
40+
int step,
41+
float chi2clcut,
42+
float chi2ndfcut,
43+
float maxQoverPt,
44+
int nCl,
45+
float Bz,
46+
TrackingFrameInfo** tfInfos,
47+
const o2::base::Propagator* prop,
48+
o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorImpl<float>::MatCorrType::USEMatCorrNONE);
4549

4650
template <int nLayers = 7>
47-
GPUg() void fitTrackSeedsKernel(
48-
CellSeed* trackSeeds,
49-
const TrackingFrameInfo** foundTrackingFrameInfo,
50-
o2::its::TrackITSExt* tracks,
51-
const float* minPts,
52-
const unsigned int nSeeds,
53-
const float Bz,
54-
const int startLevel,
55-
float maxChi2ClusterAttachment,
56-
float maxChi2NDF,
57-
const o2::base::Propagator* propagator,
58-
const o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorF::MatCorrType::USEMatCorrLUT);
51+
GPUg() void fitTrackSeedsKernel(CellSeed* trackSeeds,
52+
const TrackingFrameInfo** foundTrackingFrameInfo,
53+
o2::its::TrackITSExt* tracks,
54+
const float* minPts,
55+
const unsigned int nSeeds,
56+
const float Bz,
57+
const int startLevel,
58+
float maxChi2ClusterAttachment,
59+
float maxChi2NDF,
60+
const o2::base::Propagator* propagator,
61+
const o2::base::PropagatorF::MatCorrType matCorrType = o2::base::PropagatorF::MatCorrType::USEMatCorrLUT);
5962
#endif
6063
} // namespace gpu
6164

6265
template <int nLayers = 7>
6366
void countTrackletsInROFsHandler(const IndexTableUtils* utils,
6467
const uint8_t* multMask,
68+
const int layer,
6569
const int startROF,
6670
const int endROF,
6771
const int maxROF,
@@ -94,6 +98,7 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
9498
template <int nLayers = 7>
9599
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
96100
const uint8_t* multMask,
101+
const int layer,
97102
const int startROF,
98103
const int endROF,
99104
const int maxROF,

0 commit comments

Comments
 (0)