Skip to content

Commit b967887

Browse files
committed
ITS: GPU: add stream abstraction + use for trackleting
1 parent 8670fd5 commit b967887

File tree

6 files changed

+364
-296
lines changed

6 files changed

+364
-296
lines changed

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TimeFrameGPU.h

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,8 +22,6 @@
2222
namespace o2::its::gpu
2323
{
2424

25-
class Stream;
26-
2725
class DefaultGPUAllocator : public ExternalAllocator
2826
{
2927
void* allocate(size_t size) override;
@@ -80,10 +78,11 @@ class TimeFrameGPU : public TimeFrame
8078
void downloadCellsLUTDevice();
8179
void unregisterRest();
8280
template <Task task>
83-
Stream& getStream(const size_t stream)
81+
auto getStream(const size_t stream)
8482
{
85-
return *mGpuStreams[stream];
83+
return mGpuStreams[stream];
8684
}
85+
auto& getStreams() { return mGpuStreams; }
8786
void wipe(const int);
8887

8988
/// interface
@@ -141,7 +140,8 @@ class TimeFrameGPU : public TimeFrame
141140
int getNumberOfCells() const;
142141

143142
private:
144-
void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations
143+
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations
144+
void allocMemAsync(void**, size_t, Stream&, bool, int); // Abstract owned and unowned memory allocations and set
145145
bool mHostRegistered = false;
146146
TimeFrameGPUParameters mGpuParams;
147147

@@ -194,7 +194,7 @@ class TimeFrameGPU : public TimeFrame
194194
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;
195195

196196
// State
197-
std::vector<Stream*> mGpuStreams;
197+
Streams mGpuStreams;
198198
size_t mAvailMemGB;
199199
bool mFirstInit = true;
200200

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/TrackingKernels.h

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
7979
std::vector<float>& radii,
8080
std::vector<float>& mulScatAng,
8181
const int nBlocks,
82-
const int nThreads);
82+
const int nThreads,
83+
gpu::Streams& streams);
8384

8485
template <int nLayers = 7>
8586
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
@@ -112,7 +113,8 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
112113
std::vector<float>& radii,
113114
std::vector<float>& mulScatAng,
114115
const int nBlocks,
115-
const int nThreads);
116+
const int nThreads,
117+
gpu::Streams& streams);
116118

117119
void countCellsHandler(const Cluster** sortedClusters,
118120
const Cluster** unsortedClusters,

Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h

Lines changed: 72 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,10 @@
1616
#ifndef ITSTRACKINGGPU_UTILS_H_
1717
#define ITSTRACKINGGPU_UTILS_H_
1818

19+
#include <vector>
20+
1921
#include "GPUCommonDef.h"
22+
#include "GPUCommonHelpers.h"
2023

2124
namespace o2
2225
{
@@ -31,11 +34,6 @@ struct gpuPair {
3134
namespace gpu
3235
{
3336

34-
template <typename T>
35-
void discardResult(const T&)
36-
{
37-
}
38-
3937
// Poor man implementation of a span-like struct. It is very limited.
4038
template <typename T>
4139
struct gpuSpan {
@@ -96,8 +94,76 @@ GPUhd() const T* getPtrFromRuler(int index, const T* src, const int* ruler, cons
9694
{
9795
return src + ruler[index] * stride;
9896
}
97+
98+
// Abstract stream class
99+
class Stream
100+
{
101+
public:
102+
#if defined(__HIPCC__)
103+
using Handle = hipStream_t;
104+
static constexpr Handle Default = 0;
105+
#elif defined(__CUDACC__)
106+
using Handle = cudaStream_t;
107+
static constexpr Handle Default = 0;
108+
#else
109+
using Handle = void*;
110+
static constexpr Handle Default = nullptr;
111+
#endif
112+
113+
Stream(unsigned int flags = 0)
114+
{
115+
#if defined(__HIPCC__)
116+
GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
117+
#elif defined(__CUDACC__)
118+
GPUChkErrS(cudaStreamCreateWithFlags(&mHandle, flags));
119+
#endif
120+
}
121+
122+
Stream(Handle h) : mHandle(h) {}
123+
~Stream()
124+
{
125+
if (mHandle != Default) {
126+
#if defined(__HIPCC__)
127+
GPUChkErrS(hipStreamDestroy(mHandle));
128+
#elif defined(__CUDACC__)
129+
GPUChkErrS(cudaStreamDestroy(mHandle));
130+
#endif
131+
}
132+
}
133+
134+
operator bool() const { return mHandle != Default; }
135+
const Handle& get() { return mHandle; }
136+
void sync() const
137+
{
138+
#if defined(__HIPCC__)
139+
GPUChkErrS(hipStreamSynchronize(mHandle));
140+
#elif defined(__CUDACC__)
141+
GPUChkErrS(cudaStreamSynchronize(mHandle));
142+
#endif
143+
}
144+
145+
private:
146+
Handle mHandle{Default};
147+
};
148+
static_assert(sizeof(Stream) == sizeof(void*), "Stream type must match pointer type!");
149+
150+
// Abstract vector for streams.
151+
// Handles specifically wrap around.
152+
class Streams
153+
{
154+
public:
155+
size_t size() const noexcept { return mStreams.size(); }
156+
void resize(size_t n) { mStreams.resize(n); }
157+
void clear() { mStreams.clear(); }
158+
auto& operator[](size_t i) { return mStreams[i % mStreams.size()]; }
159+
void push_back(const Stream& stream) { mStreams.push_back(stream); }
160+
161+
private:
162+
std::vector<Stream> mStreams;
163+
};
164+
99165
} // namespace gpu
100166
} // namespace its
101167
} // namespace o2
102168

103-
#endif
169+
#endif

0 commit comments

Comments
 (0)