Skip to content

Commit 9defabd

Browse files
Felix Schlepperf3sch
authored andcommitted
ITS: GPU: partial Vertexer implementation
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent e04d84f commit 9defabd

20 files changed

+1009
-713
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 41 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -25,13 +25,13 @@ namespace o2::its::gpu
2525
{
2626

2727
template <int nLayers = 7>
28-
class TimeFrameGPU : public TimeFrame<nLayers>
28+
class TimeFrameGPU final : public TimeFrame<nLayers>
2929
{
3030
using typename TimeFrame<nLayers>::CellSeedN;
3131
using typename TimeFrame<nLayers>::IndexTableUtilsN;
3232

3333
public:
34-
TimeFrameGPU();
34+
TimeFrameGPU() = default;
3535
~TimeFrameGPU() = default;
3636

3737
/// Most relevant operations
@@ -44,13 +44,13 @@ class TimeFrameGPU : public TimeFrame<nLayers>
4444
void loadTrackingFrameInfoDevice(const int, const int);
4545
void createTrackingFrameInfoDeviceArray(const int);
4646
void loadUnsortedClustersDevice(const int, const int);
47-
void createUnsortedClustersDeviceArray(const int);
47+
void createUnsortedClustersDeviceArray(const int, const int = nLayers);
4848
void loadClustersDevice(const int, const int);
49-
void createClustersDeviceArray(const int);
49+
void createClustersDeviceArray(const int, const int = nLayers);
5050
void loadClustersIndexTables(const int, const int);
51-
void createClustersIndexTablesArray(const int iteration);
51+
void createClustersIndexTablesArray(const int);
5252
void createUsedClustersDevice(const int, const int);
53-
void createUsedClustersDeviceArray(const int);
53+
void createUsedClustersDeviceArray(const int, const int = nLayers);
5454
void loadUsedClustersDevice();
5555
void loadROFrameClustersDevice(const int, const int);
5656
void createROFrameClustersDeviceArray(const int);
@@ -85,6 +85,12 @@ class TimeFrameGPU : public TimeFrame<nLayers>
8585
void downloadCellsDevice();
8686
void downloadCellsLUTDevice();
8787

88+
/// Vertexer
89+
void createVtxTrackletsLUTDevice(const int32_t);
90+
void createVtxTrackletsBuffers(const int32_t);
91+
void createVtxLinesLUTDevice(const int32_t);
92+
void createVtxLinesBuffer(const int32_t);
93+
8894
/// synchronization
8995
auto& getStream(const size_t stream) { return mGpuStreams[stream]; }
9096
auto& getStreams() { return mGpuStreams; }
@@ -98,6 +104,8 @@ class TimeFrameGPU : public TimeFrame<nLayers>
98104
virtual void wipe() final;
99105

100106
/// interface
107+
virtual bool isGPU() const noexcept final { return true; }
108+
virtual const char* getName() const noexcept { return "GPU"; }
101109
int getNClustersInRofSpan(const int, const int, const int) const;
102110
IndexTableUtilsN* getDeviceIndexTableUtils() { return mIndexTableUtilsDevice; }
103111
int* getDeviceROFramesClusters(const int layer) { return mROFramesClustersDevice[layer]; }
@@ -122,7 +130,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
122130
const Cluster** getDeviceArrayUnsortedClusters() const { return mUnsortedClustersDeviceArray; }
123131
const int** getDeviceArrayClustersIndexTables() const { return mClustersIndexTablesDeviceArray; }
124132
std::vector<unsigned int> getClusterSizes();
125-
const unsigned char** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
133+
uint8_t** getDeviceArrayUsedClusters() const { return mUsedClustersDeviceArray; }
126134
const int** getDeviceROFrameClusters() const { return mROFramesClustersDeviceArray; }
127135
Tracklet** getDeviceArrayTracklets() { return mTrackletsDeviceArray; }
128136
int** getDeviceArrayTrackletsLUT() const { return mTrackletsLUTDeviceArray; }
@@ -135,6 +143,19 @@ class TimeFrameGPU : public TimeFrame<nLayers>
135143
int* getDeviceNeighboursIndexTables(const int layer) { return mNeighboursIndexTablesDevice[layer]; }
136144
uint8_t* getDeviceMultCutMask() { return mMultMaskDevice; }
137145

146+
// Vertexer
147+
auto& getDeviceNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDevice; }
148+
auto& getDeviceNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDevice; }
149+
auto& getDeviceNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDevice; }
150+
int32_t** getDeviceArrayNTrackletsPerROF() const noexcept { return mNTrackletsPerROFDeviceArray; }
151+
int32_t** getDeviceArrayNTrackletsPerCluster() const noexcept { return mNTrackletsPerClusterDeviceArray; }
152+
int32_t** getDeviceArrayNTrackletsPerClusterSum() const noexcept { return mNTrackletsPerClusterSumDeviceArray; }
153+
uint8_t* getDeviceUsedTracklets() const noexcept { return mUsedTrackletsDevice; }
154+
int32_t* getDeviceNLinesPerCluster() const noexcept { return mNLinesPerClusterDevice; }
155+
int32_t* getDeviceNLinesPerClusterSum() const noexcept { return mNLinesPerClusterSumDevice; }
156+
Line* getDeviceLines() const noexcept { return mLinesDevice; }
157+
gsl::span<int*> getDeviceTrackletsPerROFs() { return mNTrackletsPerROFDevice; }
158+
138159
void setDevicePropagator(const o2::base::PropagatorImpl<float>* p) final { this->mPropagatorDevice = p; }
139160

140161
// Host-specific getters
@@ -180,7 +201,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
180201
const Cluster** mClustersDeviceArray;
181202
const Cluster** mUnsortedClustersDeviceArray;
182203
const int** mClustersIndexTablesDeviceArray;
183-
const unsigned char** mUsedClustersDeviceArray;
204+
uint8_t** mUsedClustersDeviceArray;
184205
const int** mROFramesClustersDeviceArray;
185206
std::array<Tracklet*, nLayers - 1> mTrackletsDevice;
186207
std::array<int*, nLayers - 1> mTrackletsLUTDevice;
@@ -208,6 +229,18 @@ class TimeFrameGPU : public TimeFrame<nLayers>
208229
std::array<TrackingFrameInfo*, nLayers> mTrackingFrameInfoDevice;
209230
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;
210231

232+
/// Vertexer
233+
std::array<int32_t*, 2> mNTrackletsPerROFDevice;
234+
std::array<int32_t*, 2> mNTrackletsPerClusterDevice;
235+
std::array<int32_t*, 2> mNTrackletsPerClusterSumDevice;
236+
uint8_t* mUsedTrackletsDevice;
237+
int32_t* mNLinesPerClusterDevice;
238+
int32_t* mNLinesPerClusterSumDevice;
239+
int32_t** mNTrackletsPerROFDeviceArray;
240+
int32_t** mNTrackletsPerClusterDeviceArray;
241+
int32_t** mNTrackletsPerClusterSumDeviceArray;
242+
Line* mLinesDevice;
243+
211244
// State
212245
Streams mGpuStreams;
213246
std::bitset<nLayers + 1> mPinnedUnsortedClusters{0};

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 189 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,15 +20,22 @@
2020
#include <string>
2121
#include <tuple>
2222

23+
#include "ITStracking/MathUtils.h"
24+
#include "ITStracking/ExternalAllocator.h"
25+
2326
#include "GPUCommonDef.h"
2427
#include "GPUCommonHelpers.h"
2528
#include "GPUCommonLogger.h"
29+
#include "GPUCommonDefAPI.h"
2630

31+
#ifdef GPUCA_GPUCODE
32+
#include <thrust/device_ptr.h>
2733
#ifndef __HIPCC__
2834
#define THRUST_NAMESPACE thrust::cuda
2935
#else
3036
#define THRUST_NAMESPACE thrust::hip
3137
#endif
38+
#endif
3239

3340
#ifdef ITS_GPU_LOG
3441
#define GPULog(...) LOGP(info, __VA_ARGS__)
@@ -38,6 +45,10 @@
3845

3946
namespace o2::its
4047
{
48+
// FWD declarations
49+
template <int>
50+
class IndexTableUtils;
51+
class Tracklet;
4152

4253
template <typename T1, typename T2>
4354
using gpuPair = std::pair<T1, T2>;
@@ -282,6 +293,184 @@ class GPUTimer
282293
}
283294
};
284295
#endif
296+
297+
#ifdef GPUCA_GPUCODE
298+
template <typename T>
299+
struct TypedAllocator {
300+
using value_type = T;
301+
using pointer = thrust::device_ptr<T>;
302+
using const_pointer = thrust::device_ptr<const T>;
303+
using size_type = std::size_t;
304+
using difference_type = std::ptrdiff_t;
305+
306+
TypedAllocator() noexcept : mInternalAllocator(nullptr) {}
307+
explicit TypedAllocator(ExternalAllocator* a) noexcept : mInternalAllocator(a) {}
308+
309+
template <typename U>
310+
TypedAllocator(const TypedAllocator<U>& o) noexcept : mInternalAllocator(o.mInternalAllocator)
311+
{
312+
}
313+
314+
pointer allocate(size_type n)
315+
{
316+
void* raw = mInternalAllocator->allocate(n * sizeof(T));
317+
return thrust::device_pointer_cast(static_cast<T*>(raw));
318+
}
319+
320+
void deallocate(pointer p, size_type n) noexcept
321+
{
322+
if (!p) {
323+
return;
324+
}
325+
void* raw = thrust::raw_pointer_cast(p);
326+
mInternalAllocator->deallocate(static_cast<char*>(raw), n * sizeof(T));
327+
}
328+
329+
bool operator==(TypedAllocator const& o) const noexcept
330+
{
331+
return mInternalAllocator == o.mInternalAllocator;
332+
}
333+
bool operator!=(TypedAllocator const& o) const noexcept
334+
{
335+
return !(*this == o);
336+
}
337+
338+
private:
339+
ExternalAllocator* mInternalAllocator;
340+
};
341+
342+
template <int nLayers>
343+
GPUdii() const int4 getBinsRect(const Cluster& currentCluster, const int layerIndex,
344+
const o2::its::IndexTableUtils<nLayers>* utils,
345+
const float z1, const float z2, float maxdeltaz, float maxdeltaphi)
346+
{
347+
const float zRangeMin = o2::gpu::CAMath::Min(z1, z2) - maxdeltaz;
348+
const float phiRangeMin = (maxdeltaphi > o2::constants::math::PI) ? 0.f : currentCluster.phi - maxdeltaphi;
349+
const float zRangeMax = o2::gpu::CAMath::Max(z1, z2) + maxdeltaz;
350+
const float phiRangeMax = (maxdeltaphi > o2::constants::math::PI) ? o2::constants::math::TwoPI : currentCluster.phi + maxdeltaphi;
351+
352+
if (zRangeMax < -utils->getLayerZ(layerIndex) ||
353+
zRangeMin > utils->getLayerZ(layerIndex) || zRangeMin > zRangeMax) {
354+
return {};
355+
}
356+
357+
return int4{o2::gpu::CAMath::Max(0, utils->getZBinIndex(layerIndex, zRangeMin)),
358+
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMin)),
359+
o2::gpu::CAMath::Min(utils->getNzBins() - 1, utils->getZBinIndex(layerIndex, zRangeMax)),
360+
utils->getPhiBinIndex(math_utils::getNormalizedPhi(phiRangeMax))};
361+
}
362+
363+
GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int rof,
364+
const int* roframesPV,
365+
const int nROF,
366+
const uint8_t* mask,
367+
const Vertex* vertices)
368+
{
369+
const int start_pv_id = roframesPV[rof];
370+
const int stop_rof = rof >= nROF - 1 ? nROF : rof + 1;
371+
size_t delta = mask[rof] ? roframesPV[stop_rof] - start_pv_id : 0; // return empty span if ROF is excluded
372+
return gpuSpan<const Vertex>(&vertices[start_pv_id], delta);
373+
};
374+
375+
GPUdii() gpuSpan<const Vertex> getPrimaryVertices(const int romin,
376+
const int romax,
377+
const int* roframesPV,
378+
const int nROF,
379+
const Vertex* vertices)
380+
{
381+
const int start_pv_id = roframesPV[romin];
382+
const int stop_rof = romax >= nROF - 1 ? nROF : romax + 1;
383+
return gpuSpan<const Vertex>(&vertices[start_pv_id], roframesPV[stop_rof] - roframesPV[romin]);
384+
};
385+
386+
GPUdii() gpuSpan<const Cluster> getClustersOnLayer(const int rof,
387+
const int totROFs,
388+
const int layer,
389+
const int** roframesClus,
390+
const Cluster** clusters)
391+
{
392+
if (rof < 0 || rof >= totROFs) {
393+
return gpuSpan<const Cluster>();
394+
}
395+
const int start_clus_id{roframesClus[layer][rof]};
396+
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
397+
const unsigned int delta = roframesClus[layer][stop_rof] - start_clus_id;
398+
return gpuSpan<const Cluster>(&(clusters[layer][start_clus_id]), delta);
399+
}
400+
401+
GPUdii() gpuSpan<const Tracklet> getTrackletsPerCluster(const int rof,
402+
const int totROFs,
403+
const int mode,
404+
const int** roframesClus,
405+
const Tracklet** tracklets)
406+
{
407+
if (rof < 0 || rof >= totROFs) {
408+
return gpuSpan<const Tracklet>();
409+
}
410+
const int start_clus_id{roframesClus[1][rof]};
411+
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
412+
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
413+
return gpuSpan<const Tracklet>(&(tracklets[mode][start_clus_id]), delta);
414+
}
415+
416+
GPUdii() gpuSpan<int> getNTrackletsPerCluster(const int rof,
417+
const int totROFs,
418+
const int mode,
419+
const int** roframesClus,
420+
int** ntracklets)
421+
{
422+
if (rof < 0 || rof >= totROFs) {
423+
return gpuSpan<int>();
424+
}
425+
const int start_clus_id{roframesClus[1][rof]};
426+
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
427+
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
428+
return gpuSpan<int>(&(ntracklets[mode][start_clus_id]), delta);
429+
}
430+
431+
GPUdii() gpuSpan<const int> getNTrackletsPerCluster(const int rof,
432+
const int totROFs,
433+
const int mode,
434+
const int** roframesClus,
435+
const int** ntracklets)
436+
{
437+
if (rof < 0 || rof >= totROFs) {
438+
return gpuSpan<const int>();
439+
}
440+
const int start_clus_id{roframesClus[1][rof]};
441+
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
442+
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
443+
return gpuSpan<const int>(&(ntracklets[mode][start_clus_id]), delta);
444+
}
445+
446+
GPUdii() gpuSpan<int> getNLinesPerCluster(const int rof,
447+
const int totROFs,
448+
const int** roframesClus,
449+
int* nlines)
450+
{
451+
if (rof < 0 || rof >= totROFs) {
452+
return gpuSpan<int>();
453+
}
454+
const int start_clus_id{roframesClus[1][rof]};
455+
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
456+
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
457+
return gpuSpan<int>(&(nlines[start_clus_id]), delta);
458+
}
459+
460+
GPUdii() gpuSpan<const int> getNLinesPerCluster(const int rof,
461+
const int totROFs,
462+
const int** roframesClus,
463+
const int* nlines)
464+
{
465+
if (rof < 0 || rof >= totROFs) {
466+
return gpuSpan<const int>();
467+
}
468+
const int start_clus_id{roframesClus[1][rof]};
469+
const int stop_rof = rof >= totROFs - 1 ? totROFs : rof + 1;
470+
const unsigned int delta = roframesClus[1][stop_rof] - start_clus_id;
471+
return gpuSpan<const int>(&(nlines[start_clus_id]), delta);
472+
}
473+
#endif
285474
} // namespace gpu
286475
} // namespace o2::its
287476

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/VertexerTraitsGPU.h

Lines changed: 4 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -31,32 +31,25 @@
3131
namespace o2::its
3232
{
3333

34-
class VertexerTraitsGPU final : public VertexerTraits
34+
template <int nLayers>
35+
class VertexerTraitsGPU final : public VertexerTraits<nLayers>
3536
{
3637
public:
3738
void initialise(const TrackingParameters&, const int iteration = 0) final;
38-
void adoptTimeFrame(TimeFrame<7>*) noexcept final;
39+
void adoptTimeFrame(TimeFrame<nLayers>* tf) noexcept final;
3940
void computeTracklets(const int iteration = 0) final;
4041
void computeTrackletMatching(const int iteration = 0) final;
4142
void computeVertices(const int iteration = 0) final;
4243
void updateVertexingParameters(const std::vector<VertexingParameters>&, const TimeFrameGPUParameters&) final;
43-
void computeVerticesHist();
4444

4545
bool isGPU() const noexcept final { return true; }
4646
const char* getName() const noexcept final { return "GPU"; }
4747

4848
protected:
49-
IndexTableUtils* mDeviceIndexTableUtils;
50-
gpu::TimeFrameGPU<7>* mTimeFrameGPU;
49+
gpu::TimeFrameGPU<nLayers>* mTimeFrameGPU;
5150
TimeFrameGPUParameters mTfGPUParams;
5251
};
5352

54-
inline void VertexerTraitsGPU::adoptTimeFrame(TimeFrame<7>* tf) noexcept
55-
{
56-
mTimeFrameGPU = static_cast<gpu::TimeFrameGPU<7>*>(tf);
57-
mTimeFrame = static_cast<TimeFrame<7>*>(tf);
58-
}
59-
6053
} // namespace o2::its
6154

6255
#endif

0 commit comments

Comments
 (0)