Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,6 @@
namespace o2::its::gpu
{

class Stream;

class DefaultGPUAllocator : public ExternalAllocator
{
void* allocate(size_t size) override;
Expand Down Expand Up @@ -81,10 +79,11 @@ class TimeFrameGPU : public TimeFrame<nLayers>
void downloadCellsLUTDevice();
void unregisterRest();
template <Task task>
Stream& getStream(const size_t stream)
auto& getStream(const size_t stream)
{
return *mGpuStreams[stream];
return mGpuStreams[stream];
}
auto& getStreams() { return mGpuStreams; }
void wipe(const int);

/// interface
Expand Down Expand Up @@ -146,7 +145,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
int getNumberOfNeighbours() const final;

private:
void allocMemAsync(void**, size_t, Stream*, bool); // Abstract owned and unowned memory allocations
void allocMemAsync(void**, size_t, Stream&, bool); // Abstract owned and unowned memory allocations
bool mHostRegistered = false;
TimeFrameGPUParameters mGpuParams;

Expand Down Expand Up @@ -200,7 +199,7 @@ class TimeFrameGPU : public TimeFrame<nLayers>
const TrackingFrameInfo** mTrackingFrameInfoDeviceArray;

// State
std::vector<Stream*> mGpuStreams;
Streams mGpuStreams;
size_t mAvailMemGB;
bool mFirstInit = true;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@ void countTrackletsInROFsHandler(const IndexTableUtils* utils,
std::vector<float>& radii,
bounded_vector<float>& mulScatAng,
const int nBlocks,
const int nThreads);
const int nThreads,
gpu::Streams& streams);

template <int nLayers = 7>
void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
Expand Down Expand Up @@ -117,7 +118,8 @@ void computeTrackletsInROFsHandler(const IndexTableUtils* utils,
std::vector<float>& radii,
bounded_vector<float>& mulScatAng,
const int nBlocks,
const int nThreads);
const int nThreads,
gpu::Streams& streams);

void countCellsHandler(const Cluster** sortedClusters,
const Cluster** unsortedClusters,
Expand Down
90 changes: 71 additions & 19 deletions Detectors/ITSMFT/ITS/tracking/GPU/ITStrackingGPU/Utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,14 @@
#ifndef ITSTRACKINGGPU_UTILS_H_
#define ITSTRACKINGGPU_UTILS_H_

#include <vector>

#include "GPUCommonDef.h"
#include "GPUCommonHelpers.h"

namespace o2
{
namespace its
namespace o2::its
{

template <typename T1, typename T2>
struct gpuPair {
T1 first;
Expand All @@ -31,11 +33,6 @@ struct gpuPair {
namespace gpu
{

template <typename T>
void discardResult(const T&)
{
}

// Poor man implementation of a span-like struct. It is very limited.
template <typename T>
struct gpuSpan {
Expand Down Expand Up @@ -85,19 +82,74 @@ enum class Task {
Vertexer = 1
};

template <class T>
GPUhd() T* getPtrFromRuler(int index, T* src, const int* ruler, const int stride = 1)
// Abstract stream class
class Stream
{
return src + ruler[index] * stride;
}
public:
#if defined(__HIPCC__)
using Handle = hipStream_t;
static constexpr Handle Default = 0;
#elif defined(__CUDACC__)
using Handle = cudaStream_t;
static constexpr Handle Default = 0;
#else
using Handle = void*;
static constexpr Handle Default = nullptr;
#endif

Stream(unsigned int flags = 0)
{
#if defined(__HIPCC__)
GPUChkErrS(hipStreamCreateWithFlags(&mHandle, flags));
#elif defined(__CUDACC__)
GPUChkErrS(cudaStreamCreateWithFlags(&mHandle, flags));
#endif
}

template <class T>
GPUhd() const T* getPtrFromRuler(int index, const T* src, const int* ruler, const int stride = 1)
Stream(Handle h) : mHandle(h) {}
~Stream()
{
if (mHandle != Default) {
#if defined(__HIPCC__)
GPUChkErrS(hipStreamDestroy(mHandle));
#elif defined(__CUDACC__)
GPUChkErrS(cudaStreamDestroy(mHandle));
#endif
}
}

operator bool() const { return mHandle != Default; }
const Handle& get() { return mHandle; }
void sync() const
{
#if defined(__HIPCC__)
GPUChkErrS(hipStreamSynchronize(mHandle));
#elif defined(__CUDACC__)
GPUChkErrS(cudaStreamSynchronize(mHandle));
#endif
}

private:
Handle mHandle{Default};
};
static_assert(sizeof(Stream) == sizeof(void*), "Stream type must match pointer type!");

// Abstract vector for streams.
// Handles specifically wrap around.
class Streams
{
return src + ruler[index] * stride;
}
public:
size_t size() const noexcept { return mStreams.size(); }
void resize(size_t n) { mStreams.resize(n); }
void clear() { mStreams.clear(); }
auto& operator[](size_t i) { return mStreams[i % mStreams.size()]; }
void push_back(const Stream& stream) { mStreams.push_back(stream); }

private:
std::vector<Stream> mStreams;
};

} // namespace gpu
} // namespace its
} // namespace o2
} // namespace o2::its

#endif
#endif
Loading