1313#ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
1414#define TRACKINGITSGPU_INCLUDE_TIMEFRAMEGPU_H
1515
16+ #include < gsl/gsl>
17+ #include < bitset>
18+
1619#include " ITStracking/BoundedAllocator.h"
1720#include " ITStracking/TimeFrame.h"
1821#include " ITStracking/Configuration.h"
1922#include " ITStrackingGPU/Utils.h"
2023
21- #include < gsl/gsl>
22-
2324namespace o2 ::its::gpu
2425{
2526
@@ -28,7 +29,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
2829{
2930 public:
3031 TimeFrameGPU ();
31- ~TimeFrameGPU ();
32+ ~TimeFrameGPU () = default ;
3233
3334 // / Most relevant operations
3435 void registerHostMemory (const int );
@@ -37,18 +38,25 @@ class TimeFrameGPU : public TimeFrame<nLayers>
3738 void initDevice (IndexTableUtils*, const TrackingParameters& trkParam, const TimeFrameGPUParameters&, const int , const int );
3839 void initDeviceSAFitting ();
3940 void loadIndexTableUtils (const int );
40- void loadTrackingFrameInfoDevice (const int );
41- void loadUnsortedClustersDevice (const int );
42- void loadClustersDevice (const int );
43- void loadClustersIndexTables (const int iteration);
44- void createUsedClustersDevice (const int );
41+ void loadTrackingFrameInfoDevice (const int , const int );
42+ void createTrackingFrameInfoDeviceArray (const int );
43+ void loadUnsortedClustersDevice (const int , const int );
44+ void createUnsortedClustersDeviceArray (const int );
45+ void loadClustersDevice (const int , const int );
46+ void createClustersDeviceArray (const int );
47+ void loadClustersIndexTables (const int , const int );
48+ void createClustersIndexTablesArray (const int iteration);
49+ void createUsedClustersDevice (const int , const int );
50+ void createUsedClustersDeviceArray (const int );
4551 void loadUsedClustersDevice ();
46- void loadROframeClustersDevice (const int );
52+ void loadROFrameClustersDevice (const int , const int );
53+ void createROFrameClustersDeviceArray (const int );
4754 void loadMultiplicityCutMask (const int );
4855 void loadVertices (const int );
4956
5057 // /
51- void createTrackletsLUTDevice (const int );
58+ void createTrackletsLUTDevice (const int , const int );
59+ void createTrackletsLUTDeviceArray (const int );
5260 void loadTrackletsDevice ();
5361 void loadTrackletsLUTDevice ();
5462 void loadCellsDevice ();
@@ -57,11 +65,14 @@ class TimeFrameGPU : public TimeFrame<nLayers>
5765 void loadTrackSeedsChi2Device ();
5866 void loadRoadsDevice ();
5967 void loadTrackSeedsDevice (bounded_vector<CellSeed>&);
60- void createTrackletsBuffers ();
68+ void createTrackletsBuffers (const int );
69+ void createTrackletsBuffersArray (const int );
6170 void createCellsBuffers (const int );
71+ void createCellsBuffersArray (const int );
6272 void createCellsDevice ();
63- void createCellsLUTDevice ();
64- void createNeighboursIndexTablesDevice ();
73+ void createCellsLUTDevice (const int );
74+ void createCellsLUTDeviceArray (const int );
75+ void createNeighboursIndexTablesDevice (const int );
6576 void createNeighboursDevice (const unsigned int layer);
6677 void createNeighboursLUTDevice (const int , const unsigned int );
6778 void createTrackITSExtDevice (bounded_vector<CellSeed>&);
@@ -70,10 +81,17 @@ class TimeFrameGPU : public TimeFrame<nLayers>
7081 void downloadNeighboursLUTDevice (bounded_vector<int >&, const int );
7182 void downloadCellsDevice ();
7283 void downloadCellsLUTDevice ();
84+
85+ // / synchronization
7386 auto & getStream (const size_t stream) { return mGpuStreams [stream]; }
7487 auto & getStreams () { return mGpuStreams ; }
7588 void syncStream (const size_t stream);
76- void syncStreams ();
89+ void syncStreams (const bool = true );
90+ void waitEvent (const int , const int );
91+ void recordEvent (const int );
92+ void recordEvents (const int = 0 , const int = nLayers);
93+
94+ // / cleanup
7795 virtual void wipe () final ;
7896
7997 // / interface
@@ -102,19 +120,19 @@ class TimeFrameGPU : public TimeFrame<nLayers>
102120 const int ** getDeviceArrayClustersIndexTables () const { return mClustersIndexTablesDeviceArray ; }
103121 std::vector<unsigned int > getClusterSizes ();
104122 const unsigned char ** getDeviceArrayUsedClusters () const { return mUsedClustersDeviceArray ; }
105- const int ** getDeviceROframeClusters () const { return mROFrameClustersDeviceArray ; }
106- Tracklet** getDeviceArrayTracklets () { return mTrackletsDevice . data () ; }
123+ const int ** getDeviceROFrameClusters () const { return mROFramesClustersDeviceArray ; }
124+ Tracklet** getDeviceArrayTracklets () { return mTrackletsDeviceArray ; }
107125 int ** getDeviceArrayTrackletsLUT () const { return mTrackletsLUTDeviceArray ; }
108126 int ** getDeviceArrayCellsLUT () const { return mCellsLUTDeviceArray ; }
109127 int ** getDeviceArrayNeighboursCellLUT () const { return mNeighboursCellLUTDeviceArray ; }
110- CellSeed** getDeviceArrayCells () { return mCellsDevice . data () ; }
128+ CellSeed** getDeviceArrayCells () { return mCellsDeviceArray ; }
111129 CellSeed* getDeviceTrackSeeds () { return mTrackSeedsDevice ; }
112130 o2::track::TrackParCovF** getDeviceArrayTrackSeeds () { return mCellSeedsDeviceArray ; }
113131 float ** getDeviceArrayTrackSeedsChi2 () { return mCellSeedsChi2DeviceArray ; }
114132 int * getDeviceNeighboursIndexTables (const int layer) { return mNeighboursIndexTablesDevice [layer]; }
115133 uint8_t * getDeviceMultCutMask () { return mMultMaskDevice ; }
116134
117- void setDevicePropagator (const o2::base::PropagatorImpl<float >*) override ;
135+ void setDevicePropagator (const o2::base::PropagatorImpl<float >* p) final { this -> mPropagatorDevice = p; }
118136
119137 // Host-specific getters
120138 gsl::span<int , nLayers - 1 > getNTracklets () { return mNTracklets ; }
@@ -126,7 +144,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
126144 // Host-available device getters
127145 gsl::span<int *> getDeviceTrackletsLUTs () { return mTrackletsLUTDevice ; }
128146 gsl::span<int *> getDeviceCellLUTs () { return mCellsLUTDevice ; }
129- gsl::span<Tracklet*> getDeviceTracklet () { return mTrackletsDevice ; }
147+ gsl::span<Tracklet*> getDeviceTracklets () { return mTrackletsDevice ; }
130148 gsl::span<CellSeed*> getDeviceCells () { return mCellsDevice ; }
131149
132150 // Overridden getters
@@ -137,7 +155,6 @@ class TimeFrameGPU : public TimeFrame<nLayers>
137155 private:
138156 void allocMemAsync (void **, size_t , Stream&, bool ); // Abstract owned and unowned memory allocations on specific stream
139157 void allocMem (void **, size_t , bool ); // Abstract owned and unowned memory allocations on default stream
140- bool mHostRegistered = false ;
141158 TimeFrameGPUParameters mGpuParams ;
142159
143160 // Host-available device buffer sizes
@@ -161,19 +178,21 @@ class TimeFrameGPU : public TimeFrame<nLayers>
161178 const Cluster** mUnsortedClustersDeviceArray ;
162179 const int ** mClustersIndexTablesDeviceArray ;
163180 const unsigned char ** mUsedClustersDeviceArray ;
164- const int ** mROFrameClustersDeviceArray ;
181+ const int ** mROFramesClustersDeviceArray ;
165182 std::array<Tracklet*, nLayers - 1 > mTrackletsDevice ;
166183 std::array<int *, nLayers - 1 > mTrackletsLUTDevice ;
167184 std::array<int *, nLayers - 2 > mCellsLUTDevice ;
168185 std::array<int *, nLayers - 3 > mNeighboursLUTDevice ;
169186
170- int ** mCellsLUTDeviceArray ;
171- int ** mNeighboursCellDeviceArray ;
172- int ** mNeighboursCellLUTDeviceArray ;
173- int ** mTrackletsLUTDeviceArray ;
187+ Tracklet** mTrackletsDeviceArray {nullptr };
188+ int ** mCellsLUTDeviceArray {nullptr };
189+ int ** mNeighboursCellDeviceArray {nullptr };
190+ int ** mNeighboursCellLUTDeviceArray {nullptr };
191+ int ** mTrackletsLUTDeviceArray {nullptr };
174192 std::array<CellSeed*, nLayers - 2 > mCellsDevice ;
175- std::array<int *, nLayers - 2 > mNeighboursIndexTablesDevice ;
176- CellSeed* mTrackSeedsDevice ;
193+ CellSeed** mCellsDeviceArray ;
194+ std::array<int *, nLayers - 3 > mNeighboursIndexTablesDevice ;
195+ CellSeed* mTrackSeedsDevice {nullptr };
177196 std::array<o2::track::TrackParCovF*, nLayers - 2 > mCellSeedsDevice ;
178197 o2::track::TrackParCovF** mCellSeedsDeviceArray ;
179198 std::array<float *, nLayers - 2 > mCellSeedsChi2Device ;
@@ -188,6 +207,12 @@ class TimeFrameGPU : public TimeFrame<nLayers>
188207
189208 // State
190209 Streams mGpuStreams ;
210+ std::bitset<nLayers + 1 > mPinnedUnsortedClusters {0 };
211+ std::bitset<nLayers + 1 > mPinnedClusters {0 };
212+ std::bitset<nLayers + 1 > mPinnedClustersIndexTables {0 };
213+ std::bitset<nLayers + 1 > mPinnedUsedClusters {0 };
214+ std::bitset<nLayers + 1 > mPinnedROFramesClusters {0 };
215+ std::bitset<nLayers + 1 > mPinnedTrackingFrameInfo {0 };
191216
192217 // Temporary buffer for storing output tracks from GPU tracking
193218 bounded_vector<TrackITSExt> mTrackITSExt ;
0 commit comments