Skip to content

Commit 03fd79a

Browse files
authored
ITSGPU: Port findNeighbours on GPU (#13636)
1 parent 6a2cc7e commit 03fd79a

File tree

11 files changed

+893
-558
lines changed

11 files changed

+893
-558
lines changed
Lines changed: 150 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,150 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
///
12+
13+
#ifndef TRACKINGITSGPU_INCLUDE_TIMEFRAMECHUNKGPU_H
14+
#define TRACKINGITSGPU_INCLUDE_TIMEFRAMECHUNKGPU_H
15+
16+
#include "ITStracking/Configuration.h"
17+
#include "ITStracking/TimeFrame.h"
18+
19+
#include "ITStrackingGPU/ClusterLinesGPU.h"
20+
#include "ITStrackingGPU/Array.h"
21+
#include "ITStrackingGPU/Vector.h"
22+
#include "ITStrackingGPU/Stream.h"
23+
24+
#include <gsl/gsl>
25+
26+
namespace o2::its::gpu
27+
{
28+
template <int nLayers>
29+
struct StaticTrackingParameters {
30+
StaticTrackingParameters<nLayers>& operator=(const StaticTrackingParameters<nLayers>& t) = default;
31+
void set(const TrackingParameters& pars)
32+
{
33+
ClusterSharing = pars.ClusterSharing;
34+
MinTrackLength = pars.MinTrackLength;
35+
NSigmaCut = pars.NSigmaCut;
36+
PVres = pars.PVres;
37+
DeltaROF = pars.DeltaROF;
38+
ZBins = pars.ZBins;
39+
PhiBins = pars.PhiBins;
40+
CellDeltaTanLambdaSigma = pars.CellDeltaTanLambdaSigma;
41+
}
42+
43+
/// General parameters
44+
int ClusterSharing = 0;
45+
int MinTrackLength = nLayers;
46+
float NSigmaCut = 5;
47+
float PVres = 1.e-2f;
48+
int DeltaROF = 0;
49+
int ZBins{256};
50+
int PhiBins{128};
51+
52+
/// Cell finding cuts
53+
float CellDeltaTanLambdaSigma = 0.007f;
54+
};
55+
56+
template <int nLayers>
57+
class GpuTimeFrameChunk
58+
{
59+
public:
60+
static size_t computeScalingSizeBytes(const int, const TimeFrameGPUParameters&);
61+
static size_t computeFixedSizeBytes(const TimeFrameGPUParameters&);
62+
static size_t computeRofPerChunk(const TimeFrameGPUParameters&, const size_t);
63+
64+
GpuTimeFrameChunk() = delete;
65+
GpuTimeFrameChunk(o2::its::TimeFrame* tf, TimeFrameGPUParameters& conf)
66+
{
67+
mTimeFramePtr = tf;
68+
mTFGPUParams = &conf;
69+
}
70+
~GpuTimeFrameChunk();
71+
72+
/// Most relevant operations
73+
void allocate(const size_t, Stream&);
74+
void reset(const Task, Stream&);
75+
size_t loadDataOnDevice(const size_t, const size_t, const int, Stream&);
76+
77+
/// Interface
78+
Cluster* getDeviceClusters(const int);
79+
int* getDeviceClusterExternalIndices(const int);
80+
int* getDeviceIndexTables(const int);
81+
Tracklet* getDeviceTracklets(const int);
82+
int* getDeviceTrackletsLookupTables(const int);
83+
CellSeed* getDeviceCells(const int);
84+
int* getDeviceCellsLookupTables(const int);
85+
int* getDeviceRoadsLookupTables(const int);
86+
TimeFrameGPUParameters* getTimeFrameGPUParameters() const { return mTFGPUParams; }
87+
88+
int* getDeviceCUBTmpBuffer() { return mCUBTmpBufferDevice; }
89+
int* getDeviceFoundTracklets() { return mFoundTrackletsDevice; }
90+
int* getDeviceNFoundCells() { return mNFoundCellsDevice; }
91+
int* getDeviceCellNeigboursLookupTables(const int);
92+
int* getDeviceCellNeighbours(const int);
93+
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
94+
int** getDeviceArrayNeighboursCell() const { return mNeighboursCellDeviceArray; }
95+
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLookupTablesDeviceArray; }
96+
97+
/// Vertexer only
98+
int* getDeviceNTrackletCluster(const int combid) { return mNTrackletsPerClusterDevice[combid]; }
99+
Line* getDeviceLines() { return mLinesDevice; };
100+
int* getDeviceNFoundLines() { return mNFoundLinesDevice; }
101+
int* getDeviceNExclusiveFoundLines() { return mNExclusiveFoundLinesDevice; }
102+
unsigned char* getDeviceUsedTracklets() { return mUsedTrackletsDevice; }
103+
int* getDeviceClusteredLines() { return mClusteredLinesDevice; }
104+
size_t getNPopulatedRof() const { return mNPopulatedRof; }
105+
106+
private:
107+
/// Host
108+
std::array<gsl::span<const Cluster>, nLayers> mHostClusters;
109+
std::array<gsl::span<const int>, nLayers> mHostIndexTables;
110+
111+
/// Device
112+
std::array<Cluster*, nLayers> mClustersDevice;
113+
std::array<int*, nLayers> mClusterExternalIndicesDevice;
114+
std::array<int*, nLayers> mIndexTablesDevice;
115+
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
116+
std::array<int*, nLayers - 1> mTrackletsLookupTablesDevice;
117+
std::array<CellSeed*, nLayers - 2> mCellsDevice;
118+
// Road<nLayers - 2>* mRoadsDevice;
119+
std::array<int*, nLayers - 2> mCellsLookupTablesDevice;
120+
std::array<int*, nLayers - 3> mNeighboursCellDevice;
121+
std::array<int*, nLayers - 3> mNeighboursCellLookupTablesDevice;
122+
std::array<int*, nLayers - 2> mRoadsLookupTablesDevice;
123+
124+
// These are to make them accessible using layer index
125+
CellSeed** mCellsDeviceArray;
126+
int** mNeighboursCellDeviceArray;
127+
int** mNeighboursCellLookupTablesDeviceArray;
128+
129+
// Small accessory buffers
130+
int* mCUBTmpBufferDevice;
131+
int* mFoundTrackletsDevice;
132+
int* mNFoundCellsDevice;
133+
134+
/// Vertexer only
135+
Line* mLinesDevice;
136+
int* mNFoundLinesDevice;
137+
int* mNExclusiveFoundLinesDevice;
138+
unsigned char* mUsedTrackletsDevice;
139+
std::array<int*, 2> mNTrackletsPerClusterDevice;
140+
int* mClusteredLinesDevice;
141+
142+
/// State and configuration
143+
bool mAllocated = false;
144+
size_t mNRof = 0;
145+
size_t mNPopulatedRof = 0;
146+
o2::its::TimeFrame* mTimeFramePtr = nullptr;
147+
TimeFrameGPUParameters* mTFGPUParams = nullptr;
148+
};
149+
} // namespace o2::its::gpu
150+
#endif

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 19 additions & 137 deletions
Original file line numberDiff line numberDiff line change
@@ -20,156 +20,21 @@
2020
#include "ITStrackingGPU/Array.h"
2121
#include "ITStrackingGPU/Vector.h"
2222
#include "ITStrackingGPU/Stream.h"
23+
#include "ITStrackingGPU/TimeFrameChunk.h"
2324

2425
#include <gsl/gsl>
2526

2627
namespace o2
2728
{
28-
namespace gpu
29-
{
30-
class GPUChainITS;
31-
}
3229
namespace its
3330
{
34-
template <typename T1, typename T2>
35-
struct gpuPair {
36-
T1 first;
37-
T2 second;
38-
};
39-
4031
namespace gpu
4132
{
4233

4334
class DefaultGPUAllocator : public ExternalAllocator
4435
{
4536
void* allocate(size_t size) override;
4637
};
47-
template <int nLayers>
48-
struct StaticTrackingParameters {
49-
StaticTrackingParameters<nLayers>& operator=(const StaticTrackingParameters<nLayers>& t) = default;
50-
void set(const TrackingParameters& pars)
51-
{
52-
ClusterSharing = pars.ClusterSharing;
53-
MinTrackLength = pars.MinTrackLength;
54-
NSigmaCut = pars.NSigmaCut;
55-
PVres = pars.PVres;
56-
DeltaROF = pars.DeltaROF;
57-
ZBins = pars.ZBins;
58-
PhiBins = pars.PhiBins;
59-
CellDeltaTanLambdaSigma = pars.CellDeltaTanLambdaSigma;
60-
}
61-
62-
/// General parameters
63-
int ClusterSharing = 0;
64-
int MinTrackLength = nLayers;
65-
float NSigmaCut = 5;
66-
float PVres = 1.e-2f;
67-
int DeltaROF = 0;
68-
int ZBins{256};
69-
int PhiBins{128};
70-
71-
/// Cell finding cuts
72-
float CellDeltaTanLambdaSigma = 0.007f;
73-
};
74-
75-
enum class Task {
76-
Tracker = 0,
77-
Vertexer = 1
78-
};
79-
80-
template <int nLayers>
81-
class GpuTimeFrameChunk
82-
{
83-
public:
84-
static size_t computeScalingSizeBytes(const int, const TimeFrameGPUParameters&);
85-
static size_t computeFixedSizeBytes(const TimeFrameGPUParameters&);
86-
static size_t computeRofPerChunk(const TimeFrameGPUParameters&, const size_t);
87-
88-
GpuTimeFrameChunk() = delete;
89-
GpuTimeFrameChunk(o2::its::TimeFrame* tf, TimeFrameGPUParameters& conf)
90-
{
91-
mTimeFramePtr = tf;
92-
mTFGPUParams = &conf;
93-
}
94-
~GpuTimeFrameChunk();
95-
96-
/// Most relevant operations
97-
void allocate(const size_t, Stream&);
98-
void reset(const Task, Stream&);
99-
size_t loadDataOnDevice(const size_t, const size_t, const int, Stream&);
100-
101-
/// Interface
102-
Cluster* getDeviceClusters(const int);
103-
int* getDeviceClusterExternalIndices(const int);
104-
int* getDeviceIndexTables(const int);
105-
Tracklet* getDeviceTracklets(const int);
106-
int* getDeviceTrackletsLookupTables(const int);
107-
CellSeed* getDeviceCells(const int);
108-
int* getDeviceCellsLookupTables(const int);
109-
int* getDeviceRoadsLookupTables(const int);
110-
TimeFrameGPUParameters* getTimeFrameGPUParameters() const { return mTFGPUParams; }
111-
112-
int* getDeviceCUBTmpBuffer() { return mCUBTmpBufferDevice; }
113-
int* getDeviceFoundTracklets() { return mFoundTrackletsDevice; }
114-
int* getDeviceNFoundCells() { return mNFoundCellsDevice; }
115-
int* getDeviceCellNeigboursLookupTables(const int);
116-
int* getDeviceCellNeighbours(const int);
117-
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
118-
int** getDeviceArrayNeighboursCell() const { return mNeighboursCellDeviceArray; }
119-
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLookupTablesDeviceArray; }
120-
121-
/// Vertexer only
122-
int* getDeviceNTrackletCluster(const int combid) { return mNTrackletsPerClusterDevice[combid]; }
123-
Line* getDeviceLines() { return mLinesDevice; };
124-
int* getDeviceNFoundLines() { return mNFoundLinesDevice; }
125-
int* getDeviceNExclusiveFoundLines() { return mNExclusiveFoundLinesDevice; }
126-
unsigned char* getDeviceUsedTracklets() { return mUsedTrackletsDevice; }
127-
int* getDeviceClusteredLines() { return mClusteredLinesDevice; }
128-
size_t getNPopulatedRof() const { return mNPopulatedRof; }
129-
130-
private:
131-
/// Host
132-
std::array<gsl::span<const Cluster>, nLayers> mHostClusters;
133-
std::array<gsl::span<const int>, nLayers> mHostIndexTables;
134-
135-
/// Device
136-
std::array<Cluster*, nLayers> mClustersDevice;
137-
std::array<int*, nLayers> mClusterExternalIndicesDevice;
138-
std::array<int*, nLayers> mIndexTablesDevice;
139-
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
140-
std::array<int*, nLayers - 1> mTrackletsLookupTablesDevice;
141-
std::array<CellSeed*, nLayers - 2> mCellsDevice;
142-
// Road<nLayers - 2>* mRoadsDevice;
143-
std::array<int*, nLayers - 2> mCellsLookupTablesDevice;
144-
std::array<int*, nLayers - 3> mNeighboursCellDevice;
145-
std::array<int*, nLayers - 3> mNeighboursCellLookupTablesDevice;
146-
std::array<int*, nLayers - 2> mRoadsLookupTablesDevice;
147-
148-
// These are to make them accessible using layer index
149-
CellSeed** mCellsDeviceArray;
150-
int** mNeighboursCellDeviceArray;
151-
int** mNeighboursCellLookupTablesDeviceArray;
152-
153-
// Small accessory buffers
154-
int* mCUBTmpBufferDevice;
155-
int* mFoundTrackletsDevice;
156-
int* mNFoundCellsDevice;
157-
158-
/// Vertexer only
159-
Line* mLinesDevice;
160-
int* mNFoundLinesDevice;
161-
int* mNExclusiveFoundLinesDevice;
162-
unsigned char* mUsedTrackletsDevice;
163-
std::array<int*, 2> mNTrackletsPerClusterDevice;
164-
int* mClusteredLinesDevice;
165-
166-
/// State and configuration
167-
bool mAllocated = false;
168-
size_t mNRof = 0;
169-
size_t mNPopulatedRof = 0;
170-
o2::its::TimeFrame* mTimeFramePtr = nullptr;
171-
TimeFrameGPUParameters* mTFGPUParams = nullptr;
172-
};
17338

17439
template <int nLayers = 7>
17540
class TimeFrameGPU : public TimeFrame
@@ -191,13 +56,19 @@ class TimeFrameGPU : public TimeFrame
19156
void loadClustersDevice();
19257
void loadTrackletsDevice();
19358
void loadCellsDevice();
59+
void loadCellsLUT();
19460
void loadTrackSeedsDevice();
19561
void loadTrackSeedsChi2Device();
19662
void loadRoadsDevice();
19763
void loadTrackSeedsDevice(std::vector<CellSeed>&);
198-
void createCellNeighboursDevice(const unsigned int& layer, std::vector<std::pair<int, int>>& neighbours);
64+
void createNeighboursDevice(const unsigned int& layer, std::vector<std::pair<int, int>>& neighbours);
65+
void createNeighboursLUTDevice(const int, const unsigned int);
19966
void createTrackITSExtDevice(std::vector<CellSeed>&);
20067
void downloadTrackITSExtDevice(std::vector<CellSeed>&);
68+
void downloadCellsNeighbours(std::vector<std::vector<std::pair<int, int>>>&, const int);
69+
void downloadNeighboursLUT(std::vector<int>&, const int);
70+
void downloadCellsDevice(const int);
71+
void unregisterRest();
20172
void initDeviceChunks(const int, const int);
20273
template <Task task>
20374
size_t loadChunkData(const size_t, const size_t, const size_t);
@@ -224,17 +95,22 @@ class TimeFrameGPU : public TimeFrame
22495
// Hybrid
22596
Road<nLayers - 2>* getDeviceRoads() { return mRoadsDevice; }
22697
TrackITSExt* getDeviceTrackITSExt() { return mTrackITSExtDevice; }
98+
int* getDeviceNeighboursLUT(const int layer) { return mNeighboursLUTDevice[layer]; }
22799
gpuPair<int, int>* getDeviceNeighbours(const int layer) { return mNeighboursDevice[layer]; }
228100
TrackingFrameInfo* getDeviceTrackingFrameInfo(const int);
229101
// TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() { return mTrackingFrameInfoDeviceArray; }
230102
const TrackingFrameInfo** getDeviceArrayTrackingFrameInfo() const { return mTrackingFrameInfoDeviceArray; }
231103
Cluster** getDeviceArrayClusters() const { return mClustersDeviceArray; }
232104
Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
233105
Tracklet** getDeviceArrayTracklets() const { return mTrackletsDeviceArray; }
106+
int** getDeviceArrayCellsLUT() const { return mCellsLUTDeviceArray; }
107+
int** getDeviceArrayNeighboursCellLUT() const { return mNeighboursCellLUTDeviceArray; }
234108
CellSeed** getDeviceArrayCells() const { return mCellsDeviceArray; }
235109
CellSeed* getDeviceTrackSeeds() { return mTrackSeedsDevice; }
236110
o2::track::TrackParCovF** getDeviceArrayTrackSeeds() { return mCellSeedsDeviceArray; }
237111
float** getDeviceArrayTrackSeedsChi2() { return mCellSeedsChi2DeviceArray; }
112+
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
113+
238114
void setDevicePropagator(const o2::base::PropagatorImpl<float>*) override;
239115

240116
// Host-specific getters
@@ -263,7 +139,13 @@ class TimeFrameGPU : public TimeFrame
263139
Cluster** mUnsortedClustersDeviceArray;
264140
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
265141
Tracklet** mTrackletsDeviceArray;
142+
std::array<int*, nLayers - 2> mCellsLUTDevice;
143+
std::array<int*, nLayers - 3> mNeighboursLUTDevice;
144+
int** mCellsLUTDeviceArray;
145+
int** mNeighboursCellDeviceArray;
146+
int** mNeighboursCellLUTDeviceArray;
266147
std::array<CellSeed*, nLayers - 2> mCellsDevice;
148+
std::array<int*, nLayers - 2> mNeighboursIndexTablesDevice;
267149
CellSeed* mTrackSeedsDevice;
268150
CellSeed** mCellsDeviceArray;
269151
std::array<o2::track::TrackParCovF*, nLayers - 2> mCellSeedsDevice;

0 commit comments

Comments
 (0)