Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/Common/GPUCommonDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@

#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY))
#define GPUCA_NO_CONSTANT_MEMORY
#elif defined(__CUDACC__) || defined(__HIPCC__)
#elif (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(GPUCA_GPUCODE_HOSTONLY)
#define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM
#endif

Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/GPUConstantMem.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,7 +95,7 @@ union GPUConstantMemCopyable {
static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1);
#endif
} // namespace o2::gpu
#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY)
#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM)
GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol
#endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM
namespace o2::gpu
Expand All @@ -104,7 +104,7 @@ namespace o2::gpu
// Must be placed here, to avoid circular header dependency
GPUdi() GPUconstantref() const GPUConstantMem* GPUProcessor::GetConstantMem() const
{
#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY)
#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM)
return &GPUCA_CONSMEM;
#else
return mConstantMem;
Expand Down
24 changes: 18 additions & 6 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "GPUReconstructionIncludes.h"
#include "GPUReconstructionThreading.h"
#include "GPUChain.h"

#include "GPUDefParameters.h"
#include "GPUTPCClusterData.h"
#include "GPUTPCSectorOutCluster.h"
#include "GPUTPCGMMergedTrack.h"
Expand Down Expand Up @@ -120,15 +120,27 @@ void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs<T, I, Arg
#pragma GCC diagnostic push
}

template <class T, int32_t I>
krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend()
template <class S, int32_t I>
gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu)
{
return krnlProperties{1, 1};
if (gpu == -1) {
gpu = IsGPU();
}
const auto num = GetKernelNum<S, I>();
const auto* p = gpu ? mParDevice : mParCPU;
gpu_reconstruction_kernels::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]};
if (ret.nThreads == 0) {
ret.nThreads = gpu ? mThreadCount : 1u;
}
if (ret.minBlocks == 0) {
ret.minBlocks = 1;
}
return ret;
}

#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
template void GPUReconstructionCPUBackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args); \
template krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>();
template krnlProperties GPUReconstructionCPU::getKernelProperties<GPUCA_M_KRNL_TEMPLATE(x_class)>(int gpu);
#include "GPUReconstructionKernelList.h"
#undef GPUCA_KRNL

Expand Down
15 changes: 6 additions & 9 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,6 @@ class GPUReconstructionCPUBackend : public GPUReconstructionProcessing
void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs<T, I, Args...>& args);
template <class T, int32_t I = 0, typename... Args>
void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args);
template <class T, int32_t I>
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend();
};

class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCPUBackend>
Expand All @@ -55,10 +53,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
template <class S, int32_t I = 0, typename... Args>
void runKernel(krnlSetup&& setup, Args&&... args);
template <class S, int32_t I = 0>
const gpu_reconstruction_kernels::krnlProperties getKernelProperties()
{
return getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument<S, I>());
}
gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu = -1);

virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false);
int32_t GPUStuck() { return mGPUStuck; }
Expand All @@ -77,13 +72,15 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP

GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) {}

#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, ...) \
inline void runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
{ \
krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)> args(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward)); \
const uint32_t num = GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(); \
if (cpuFallback) { \
GPUReconstructionCPU::runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
GPUReconstructionCPU::runKernelImpl(num, &args); \
} else { \
runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
runKernelImpl(num, &args); \
} \
}
#include "GPUReconstructionKernelList.h"
Expand Down
27 changes: 9 additions & 18 deletions GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,21 +38,18 @@
#ifndef GPUCA_KRNL_CUSTOM
#define GPUCA_KRNL_CUSTOM(...)
#endif
#define GPUCA_KRNL_REG_EXTRREG(...) GPUCA_M_STRIP(__VA_ARGS__)
#define GPUCA_KRNL_CUSTOM_EXTRREG(MODE, ...) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__)
#define GPUCA_KRNL_NONE_EXTRREG(MODE, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__)
#define GPUCA_ATTRRES_REG(MODE, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, MODE))(num) GPUCA_ATTRRES_XREG (MODE, __VA_ARGS__)
#define GPUCA_ATTRRES_CUSTOM(MODE, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, MODE))(args) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__)
#define GPUCA_ATTRRES_NONE(MODE, none, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__)
#define GPUCA_ATTRRES_(MODE, ...)
#define GPUCA_ATTRRES_XNONE(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
#define GPUCA_ATTRRES_XCUSTOM(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
#define GPUCA_ATTRRES_XREG(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
#define GPUCA_ATTRRES(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
#define GPUCA_ATTRRES_REG(reg, num, ...) GPUCA_M_EXPAND(GPUCA_KRNL_REG)(num) GPUCA_ATTRRES_XREG (__VA_ARGS__)
#define GPUCA_ATTRRES_CUSTOM(custom, args, ...) GPUCA_M_EXPAND(GPUCA_KRNL_CUSTOM)(args) GPUCA_ATTRRES_XCUSTOM(__VA_ARGS__)
#define GPUCA_ATTRRES_NONE(none, ...) GPUCA_ATTRRES_XNONE(__VA_ARGS__)
#define GPUCA_ATTRRES_(...)
#define GPUCA_ATTRRES_XNONE(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)
#define GPUCA_ATTRRES_XCUSTOM(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)
#define GPUCA_ATTRRES_XREG(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)
#define GPUCA_ATTRRES(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)

// GPU Kernel entry point
#define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \
GPUg() void GPUCA_ATTRRES(, GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments))
GPUg() void GPUCA_ATTRRES(GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments))

#ifdef GPUCA_KRNL_DEFONLY
#define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__);
Expand All @@ -79,12 +76,6 @@
} \
};

#define GPUCA_KRNL_PROP(x_class, x_attributes) \
template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \
gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \
return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \
}

#endif // GPUCA_GPUCODE

#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__)
Expand Down
20 changes: 11 additions & 9 deletions GPU/GPUTracking/Base/GPUReconstructionKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,17 +95,19 @@ class GPUReconstructionKernels : public T
template <class S, int32_t I = 0, typename... Args>
using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs<S, I, Args...>;

#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
virtual void runKernelImpl(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args) \
{ \
T::template runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(args); \
} \
virtual gpu_reconstruction_kernels::krnlProperties getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>) \
{ \
return T::template getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(); \
}
virtual void runKernelImpl(const int num, const void* args)
{
switch (num) { // clang-format off
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \
case x_num: { \
const auto& args2 = *(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>*)args; \
T::template runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(args2); \
break; \
}
#include "GPUReconstructionKernelList.h"
#undef GPUCA_KRNL
} // clang-format on
}
};

} // namespace o2::gpu
Expand Down
68 changes: 38 additions & 30 deletions GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -12,11 +12,35 @@
/// \file GPUReconstructionProcessing.cxx
/// \author David Rohr

#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS
#include "GPUDefParametersDefault.h"
#include "GPUDefParametersLoad.inc"

#include "GPUReconstructionProcessing.h"
#include "GPUReconstructionThreading.h"

using namespace o2::gpu;

GPUReconstructionProcessing::GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg)
{
if (mMaster == nullptr) {
mParCPU = new GPUDefParameters(o2::gpu::internal::GPUDefParametersLoad());
mParDevice = new GPUDefParameters();
} else {
GPUReconstructionProcessing* master = dynamic_cast<GPUReconstructionProcessing*>(mMaster);
mParCPU = master->mParCPU;
mParDevice = master->mParDevice;
}
}

GPUReconstructionProcessing::~GPUReconstructionProcessing()
{
if (mMaster == nullptr) {
delete mParCPU;
delete mParDevice;
}
}

int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores)
{
int32_t nThreads = 0;
Expand Down Expand Up @@ -119,38 +143,22 @@ std::unique_ptr<gpu_reconstruction_kernels::threadContext> GPUReconstructionProc
gpu_reconstruction_kernels::threadContext::threadContext() = default;
gpu_reconstruction_kernels::threadContext::~threadContext() = default;

template <class T, int32_t I>
uint32_t GPUReconstructionProcessing::GetKernelNum(int32_t k)
{
static int32_t num = k;
if (num < 0) {
throw std::runtime_error("Internal Error - Kernel Number not Set");
}
return num;
}

namespace o2::gpu::internal
{
static std::vector<std::string> initKernelNames()
{
std::vector<std::string> retVal;
#define GPUCA_KRNL(x_class, ...) \
GPUReconstructionProcessing::GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(retVal.size()); \
retVal.emplace_back(GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)));
const std::vector<std::string> GPUReconstructionProcessing::mKernelNames = {
#define GPUCA_KRNL(x_class, ...) GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)),
#include "GPUReconstructionKernelList.h"
#undef GPUCA_KRNL
return retVal;
}
} // namespace o2::gpu::internal

const std::vector<std::string> GPUReconstructionProcessing::mKernelNames = o2::gpu::internal::initKernelNames();

#define GPUCA_KRNL(x_class, ...) \
template uint32_t GPUReconstructionProcessing::GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>(int32_t); \
template <> \
const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
{ \
return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
};

#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types, x_num) \
template <> \
uint32_t GPUReconstructionProcessing::GetKernelNum<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
{ \
return x_num; \
} \
template <> \
const char* GPUReconstructionProcessing::GetKernelName<GPUCA_M_KRNL_TEMPLATE(x_class)>() \
{ \
return GPUCA_M_STR(GPUCA_M_KRNL_NAME(x_class)); \
}
#include "GPUReconstructionKernelList.h"
#undef GPUCA_KRNL
11 changes: 8 additions & 3 deletions GPU/GPUTracking/Base/GPUReconstructionProcessing.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,8 @@
namespace o2::gpu
{

struct GPUDefParameters;

namespace gpu_reconstruction_kernels
{
struct deviceEvent {
Expand Down Expand Up @@ -63,7 +65,7 @@ class threadContext
class GPUReconstructionProcessing : public GPUReconstruction
{
public:
~GPUReconstructionProcessing() override = default;
~GPUReconstructionProcessing() override;

// Threading
int32_t getNKernelHostThreads(bool splitCores);
Expand All @@ -78,7 +80,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
static const char* GetKernelName();
const std::string& GetKernelName(int32_t i) const { return mKernelNames[i]; }
template <class T, int32_t I = 0>
static uint32_t GetKernelNum(int32_t k = -1);
static uint32_t GetKernelNum();

// Public queries for timers
auto& getRecoStepTimer(RecoStep step) { return mTimersRecoSteps[getRecoStepNum(step)]; }
Expand All @@ -101,7 +103,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
};

protected:
GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {}
GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg);
using deviceEvent = gpu_reconstruction_kernels::deviceEvent;

static const std::vector<std::string> mKernelNames;
Expand Down Expand Up @@ -132,6 +134,9 @@ class GPUReconstructionProcessing : public GPUReconstruction
template <class T, int32_t J = -1>
HighResTimer& getTimer(const char* name, int32_t num = -1);

GPUDefParameters* mParCPU = nullptr;
GPUDefParameters* mParDevice = nullptr;

private:
uint32_t getNextTimerId();
timerMeta* getTimerById(uint32_t id, bool increment = true);
Expand Down
17 changes: 10 additions & 7 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,13 @@
/// \author David Rohr

#define GPUCA_GPUCODE_HOSTONLY
#include "GPUReconstructionCUDAIncludesHost.h"

#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS
#include "GPUReconstructionCUDADef.h"
#include "GPUDefParametersDefault.h"
#include "GPUDefParametersLoad.inc"

#include "GPUReconstructionCUDAIncludesHost.h"
#include <cuda_profiler_api.h>

#include "GPUReconstructionCUDA.h"
Expand Down Expand Up @@ -51,11 +56,14 @@ GPUReconstructionCUDABackend::GPUReconstructionCUDABackend(const GPUSettingsDevi
{
if (mMaster == nullptr) {
mInternals = new GPUReconstructionCUDAInternals;
*mParDevice = o2::gpu::internal::GPUDefParametersLoad();
}
mDeviceBackendSettings.deviceType = DeviceType::CUDA;
}

GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend()
{
Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA
if (mMaster == nullptr) {
delete mInternals;
}
Expand All @@ -69,7 +77,6 @@ int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, con

GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg)
{
mDeviceBackendSettings.deviceType = DeviceType::CUDA;
#ifndef __HIPCC__ // CUDA
mRtcSrcExtension = ".cu";
mRtcBinExtension = ".fatbin";
Expand All @@ -78,11 +85,7 @@ GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg
mRtcBinExtension = ".o";
#endif
}

GPUReconstructionCUDA::~GPUReconstructionCUDA()
{
Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA
}
GPUReconstructionCUDA::~GPUReconstructionCUDA() {}

GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionCUDA(cfg); }

Expand Down
Loading