Skip to content

Commit e34c924

Browse files
committed
ITS: GPU use ms for tracklets
Signed-off-by: Felix Schlepper <felix.schlepper@cern.ch>
1 parent bb8f947 commit e34c924

File tree

6 files changed

+268
-236
lines changed

6 files changed

+268
-236
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,6 @@
2323
namespace o2::its::gpu
2424
{
2525

26-
class Stream;
27-
2826
class DefaultGPUAllocator : public ExternalAllocator
2927
{
3028
void* allocate(size_t size) override;
@@ -81,10 +79,11 @@ class TimeFrameGPU : public TimeFrame<nLayers>
8179
void downloadCellsLUTDevice();
8280
void unregisterRest();
8381
template <Task task>
84-
Stream& getStream(const size_t stream)
82+
auto& getStream(const size_t stream)
8583
{
86-
return *mGpuStreams[stream];
84+
return mGpuStreams[stream];
8785
}
86+
auto& getStreams() { return mGpuStreams; }
8887
void wipe(const int);
8988

9089
/// interface
@@ -146,7 +145,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
146145
int getNumberOfNeighbours() const final;
147146

148147
private:
149-
void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations
148+
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations
150149
bool mHostRegistered = false;
151150
TimeFrameGPUParameters mGpuParams;
152151

@@ -200,7 +199,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
200199
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;
201200

202201
// State
203-
std::vector<Stream*> mGpuStreams;
202+
Streams mGpuStreams;
204203
size_t mAvailMemGB;
205204
bool mFirstInit = true;
206205

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
8484
std::vector<float>& radii,
8585
bounded_vector<float>& mulScatAng,
8686
const int nBlocks,
87-
const int nThreads);
87+
const int nThreads,
88+
gpu::Streams& streams);
8889

8990
template <int nLayers = 7>
9091
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
@@ -117,7 +118,8 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
117118
std::vector<float>& radii,
118119
bounded_vector<float>& mulScatAng,
119120
const int nBlocks,
120-
const int nThreads);
121+
const int nThreads,
122+
gpu::Streams& streams);
121123

122124
void countCellsHandler(const Cluster** sortedClusters,
123125
const Cluster** unsortedClusters,

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 71 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,12 +16,14 @@
1616
#ifndef ITSTRACKINGGPU_UTILS_H_
1717
#define ITSTRACKINGGPU_UTILS_H_
1818

19+
#include <vector>
20+
1921
#include "GPUCommonDef.h"
22+
#include "GPUCommonHelpers.h"
2023

21-
namespace o2
22-
{
23-
namespace its
24+
namespace o2::its
2425
{
26+
2527
template <typename T1, typename T2>
2628
struct gpuPair {
2729
T1 first;
@@ -31,11 +33,6 @@ struct gpuPair {
3133
namespace gpu
3234
{
3335

34-
template <typename T>
35-
void discardResult(const T&)
36-
{
37-
}
38-
3936
// Poor man implementation of a span-like struct. It is very limited.
4037
template <typename T>
4138
struct gpuSpan {
@@ -85,19 +82,74 @@ enum class Task {
8582
Vertexer = 1
8683
};
8784

88-
template <class T>
89-
GPUhd() T* getPtrFromRuler(int index, T* src, const int* ruler, const int stride = 1)
85+
// Abstract stream class
86+
class Stream
9087
{
91-
return src + ruler[index] * stride;
92-
}
88+
public:
89+
#if defined(__HIPCC__)
90+
using Handle = hipStream_t;
91+
static constexpr Handle Default = 0;
92+
#elif defined(__CUDACC__)
93+
using Handle = cudaStream_t;
94+
static constexpr Handle Default = 0;
95+
#else
96+
using Handle = void*;
97+
static constexpr Handle Default = nullptr;
98+
#endif
99+
100+
Stream(unsigned int flags = 0)
101+
{
102+
#if defined(__HIPCC__)
103+
GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
104+
#elif defined(__CUDACC__)
105+
GPUChkErrS(cudaStreamCreateWithFlags(&mHandle, flags));
106+
#endif
107+
}
93108

94-
template <class T>
95-
GPUhd() const T* getPtrFromRuler(int index, const T* src, const int* ruler, const int stride = 1)
109+
Stream(Handle h) : mHandle(h) {}
110+
~Stream()
111+
{
112+
if (mHandle != Default) {
113+
#if defined(__HIPCC__)
114+
GPUChkErrS(hipStreamDestroy(mHandle));
115+
#elif defined(__CUDACC__)
116+
GPUChkErrS(cudaStreamDestroy(mHandle));
117+
#endif
118+
}
119+
}
120+
121+
operator bool() const { return mHandle != Default; }
122+
const Handle& get() { return mHandle; }
123+
void sync() const
124+
{
125+
#if defined(__HIPCC__)
126+
GPUChkErrS(hipStreamSynchronize(mHandle));
127+
#elif defined(__CUDACC__)
128+
GPUChkErrS(cudaStreamSynchronize(mHandle));
129+
#endif
130+
}
131+
132+
private:
133+
Handle mHandle{Default};
134+
};
135+
static_assert(sizeof(Stream) == sizeof(void*), "Stream type must match pointer type!");
136+
137+
// Abstract vector for streams.
138+
// Handles specifically wrap around.
139+
class Streams
96140
{
97-
return src + ruler[index] * stride;
98-
}
141+
public:
142+
size_t size() const noexcept { return mStreams.size(); }
143+
void resize(size_t n) { mStreams.resize(n); }
144+
void clear() { mStreams.clear(); }
145+
auto& operator[](size_t i) { return mStreams[i % mStreams.size()]; }
146+
void push_back(const Stream& stream) { mStreams.push_back(stream); }
147+
148+
private:
149+
std::vector<Stream> mStreams;
150+
};
151+
99152
} // namespace gpu
100-
} // namespace its
101-
} // namespace o2
153+
} // namespace o2::its
102154

103-
#endif
155+
#endif

0 commit comments

Comments
 (0)