Skip to content

Commit 6d54cfc

Browse files
committed
GPU: Make launch bounds of GPUDefParamters available as C++ struct at runtime
1 parent 243279a commit 6d54cfc

24 files changed

+488
-103
lines changed

GPU/Common/GPUCommonDef.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@
5858

5959
#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY))
6060
#define GPUCA_NO_CONSTANT_MEMORY
61-
#elif defined(__CUDACC__) || defined(__HIPCC__)
61+
#elif (defined(__CUDACC__) || defined(__HIPCC__)) && !defined(GPUCA_GPUCODE_HOSTONLY)
6262
#define GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM
6363
#endif
6464

GPU/GPUTracking/Base/GPUConstantMem.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -95,7 +95,7 @@ union GPUConstantMemCopyable {
9595
static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + sizeof(uint4) - 1);
9696
#endif
9797
} // namespace o2::gpu
98-
#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY)
98+
#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM)
9999
GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol
100100
#endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM
101101
namespace o2::gpu
@@ -104,7 +104,7 @@ namespace o2::gpu
104104
// Must be placed here, to avoid circular header dependency
105105
GPUdi() GPUconstantref() const GPUConstantMem* GPUProcessor::GetConstantMem() const
106106
{
107-
#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY)
107+
#if defined(GPUCA_GPUCODE_DEVICE) && defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM)
108108
return &GPUCA_CONSMEM;
109109
#else
110110
return mConstantMem;

GPU/GPUTracking/Base/GPUReconstructionCPU.cxx

Lines changed: 17 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@
1616
#include "GPUReconstructionIncludes.h"
1717
#include "GPUReconstructionThreading.h"
1818
#include "GPUChain.h"
19-
19+
#include "GPUDefParameters.h"
2020
#include "GPUTPCClusterData.h"
2121
#include "GPUTPCSectorOutCluster.h"
2222
#include "GPUTPCGMMergedTrack.h"
@@ -120,15 +120,27 @@ void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs<T, I, Arg
120120
#pragma GCC diagnostic push
121121
}
122122

123-
template <class T, int32_t I>
124-
krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend()
123+
template <class S, int32_t I>
124+
gpu_reconstruction_kernels::krnlProperties GPUReconstructionCPU::getKernelProperties(int gpu)
125125
{
126-
return krnlProperties{1, 1};
126+
if (gpu == -1) {
127+
gpu = IsGPU();
128+
}
129+
const auto num = GetKernelNum<S, I>();
130+
const auto* p = gpu ? mParDevice : mParCPU;
131+
gpu_reconstruction_kernels::krnlProperties ret = {p->par_LB_maxThreads[num], p->par_LB_minBlocks[num], p->par_LB_forceBlocks[num]};
132+
if (ret.nThreads == 0) {
133+
ret.nThreads = gpu ? mThreadCount : 1u;
134+
}
135+
if (ret.minBlocks == 0) {
136+
ret.minBlocks = 1;
137+
}
138+
return ret;
127139
}
128140

129141
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
130142
template void GPUReconstructionCPUBackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args); \
131-
template krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>();
143+
template krnlProperties GPUReconstructionCPU::getKernelProperties<GPUCA_M_KRNL_TEMPLATE(x_class)>(int gpu);
132144
#include "GPUReconstructionKernelList.h"
133145
#undef GPUCA_KRNL
134146

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 1 addition & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -38,8 +38,6 @@ class GPUReconstructionCPUBackend : public GPUReconstructionProcessing
3838
void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs<T, I, Args...>& args);
3939
template <class T, int32_t I = 0, typename... Args>
4040
void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args);
41-
template <class T, int32_t I>
42-
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend();
4341
};
4442

4543
class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCPUBackend>
@@ -55,10 +53,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
5553
template <class S, int32_t I = 0, typename... Args>
5654
void runKernel(krnlSetup&& setup, Args&&... args);
5755
template <class S, int32_t I = 0>
58-
const gpu_reconstruction_kernels::krnlProperties getKernelProperties()
59-
{
60-
return getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument<S, I>());
61-
}
56+
gpu_reconstruction_kernels::krnlProperties getKernelProperties(int gpu = -1);
6257

6358
virtual int32_t GPUDebug(const char* state = "UNKNOWN", int32_t stream = -1, bool force = false);
6459
int32_t GPUStuck() { return mGPUStuck; }

GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h

Lines changed: 9 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -38,21 +38,18 @@
3838
#ifndef GPUCA_KRNL_CUSTOM
3939
#define GPUCA_KRNL_CUSTOM(...)
4040
#endif
41-
#define GPUCA_KRNL_REG_EXTRREG(...) GPUCA_M_STRIP(__VA_ARGS__)
42-
#define GPUCA_KRNL_CUSTOM_EXTRREG(MODE, ...) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__)
43-
#define GPUCA_KRNL_NONE_EXTRREG(MODE, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__)
44-
#define GPUCA_ATTRRES_REG(MODE, reg, num, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_REG, MODE))(num) GPUCA_ATTRRES_XREG (MODE, __VA_ARGS__)
45-
#define GPUCA_ATTRRES_CUSTOM(MODE, custom, args, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_KRNL_CUSTOM, MODE))(args) GPUCA_ATTRRES_XCUSTOM(MODE, __VA_ARGS__)
46-
#define GPUCA_ATTRRES_NONE(MODE, none, ...) GPUCA_ATTRRES_XNONE(MODE, __VA_ARGS__)
47-
#define GPUCA_ATTRRES_(MODE, ...)
48-
#define GPUCA_ATTRRES_XNONE(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
49-
#define GPUCA_ATTRRES_XCUSTOM(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
50-
#define GPUCA_ATTRRES_XREG(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
51-
#define GPUCA_ATTRRES(MODE, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(MODE, __VA_ARGS__)
41+
#define GPUCA_ATTRRES_REG(reg, num, ...) GPUCA_M_EXPAND(GPUCA_KRNL_REG)(num) GPUCA_ATTRRES_XREG (__VA_ARGS__)
42+
#define GPUCA_ATTRRES_CUSTOM(custom, args, ...) GPUCA_M_EXPAND(GPUCA_KRNL_CUSTOM)(args) GPUCA_ATTRRES_XCUSTOM(__VA_ARGS__)
43+
#define GPUCA_ATTRRES_NONE(none, ...) GPUCA_ATTRRES_XNONE(__VA_ARGS__)
44+
#define GPUCA_ATTRRES_(...)
45+
#define GPUCA_ATTRRES_XNONE(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)
46+
#define GPUCA_ATTRRES_XCUSTOM(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)
47+
#define GPUCA_ATTRRES_XREG(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)
48+
#define GPUCA_ATTRRES(...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(__VA_ARGS__)
5249

5350
// GPU Kernel entry point
5451
#define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \
55-
GPUg() void GPUCA_ATTRRES(, GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments))
52+
GPUg() void GPUCA_ATTRRES(GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments))
5653

5754
#ifdef GPUCA_KRNL_DEFONLY
5855
#define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__);
@@ -79,12 +76,6 @@
7976
} \
8077
};
8178

82-
#define GPUCA_KRNL_PROP(x_class, x_attributes) \
83-
template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \
84-
gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \
85-
return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \
86-
}
87-
8879
#endif // GPUCA_GPUCODE
8980

9081
#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__)

GPU/GPUTracking/Base/GPUReconstructionKernels.h

Lines changed: 4 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -95,14 +95,10 @@ class GPUReconstructionKernels : public T
9595
template <class S, int32_t I = 0, typename... Args>
9696
using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs<S, I, Args...>;
9797

98-
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
99-
virtual void runKernelImpl(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args) \
100-
{ \
101-
T::template runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(args); \
102-
} \
103-
virtual gpu_reconstruction_kernels::krnlProperties getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>) \
104-
{ \
105-
return T::template getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(); \
98+
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
99+
virtual void runKernelImpl(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args) \
100+
{ \
101+
T::template runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(args); \
106102
}
107103
#include "GPUReconstructionKernelList.h"
108104
#undef GPUCA_KRNL

GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,11 +12,35 @@
1212
/// \file GPUReconstructionProcessing.cxx
1313
/// \author David Rohr
1414

15+
#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS
16+
#include "GPUDefParametersDefault.h"
17+
#include "GPUDefParametersLoad.inc"
18+
1519
#include "GPUReconstructionProcessing.h"
1620
#include "GPUReconstructionThreading.h"
1721

1822
using namespace o2::gpu;
1923

24+
GPUReconstructionProcessing::GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg)
25+
{
26+
if (mMaster == nullptr) {
27+
mParCPU = new GPUDefParameters(o2::gpu::internal::GPUDefParametersLoad());
28+
mParDevice = new GPUDefParameters();
29+
} else {
30+
GPUReconstructionProcessing* master = dynamic_cast<GPUReconstructionProcessing*>(mMaster);
31+
mParCPU = master->mParCPU;
32+
mParDevice = master->mParDevice;
33+
}
34+
}
35+
36+
GPUReconstructionProcessing::~GPUReconstructionProcessing()
37+
{
38+
if (mMaster == nullptr) {
39+
delete mParCPU;
40+
delete mParDevice;
41+
}
42+
}
43+
2044
int32_t GPUReconstructionProcessing::getNKernelHostThreads(bool splitCores)
2145
{
2246
int32_t nThreads = 0;

GPU/GPUTracking/Base/GPUReconstructionProcessing.h

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,8 @@
2525
namespace o2::gpu
2626
{
2727

28+
struct GPUDefParameters;
29+
2830
namespace gpu_reconstruction_kernels
2931
{
3032
struct deviceEvent {
@@ -63,7 +65,7 @@ class threadContext
6365
class GPUReconstructionProcessing : public GPUReconstruction
6466
{
6567
public:
66-
~GPUReconstructionProcessing() override = default;
68+
~GPUReconstructionProcessing() override;
6769

6870
// Threading
6971
int32_t getNKernelHostThreads(bool splitCores);
@@ -101,7 +103,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
101103
};
102104

103105
protected:
104-
GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg) : GPUReconstruction(cfg) {}
106+
GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg);
105107
using deviceEvent = gpu_reconstruction_kernels::deviceEvent;
106108

107109
static const std::vector<std::string> mKernelNames;
@@ -132,6 +134,9 @@ class GPUReconstructionProcessing : public GPUReconstruction
132134
template <class T, int32_t J = -1>
133135
HighResTimer& getTimer(const char* name, int32_t num = -1);
134136

137+
GPUDefParameters* mParCPU = nullptr;
138+
GPUDefParameters* mParDevice = nullptr;
139+
135140
private:
136141
uint32_t getNextTimerId();
137142
timerMeta* getTimerById(uint32_t id, bool increment = true);

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 10 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,13 @@
1313
/// \author David Rohr
1414

1515
#define GPUCA_GPUCODE_HOSTONLY
16-
#include "GPUReconstructionCUDAIncludesHost.h"
1716

17+
#define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS
18+
#include "GPUReconstructionCUDADef.h"
19+
#include "GPUDefParametersDefault.h"
20+
#include "GPUDefParametersLoad.inc"
21+
22+
#include "GPUReconstructionCUDAIncludesHost.h"
1823
#include <cuda_profiler_api.h>
1924

2025
#include "GPUReconstructionCUDA.h"
@@ -51,11 +56,14 @@ GPUReconstructionCUDABackend::GPUReconstructionCUDABackend(const GPUSettingsDevi
5156
{
5257
if (mMaster == nullptr) {
5358
mInternals = new GPUReconstructionCUDAInternals;
59+
*mParDevice = o2::gpu::internal::GPUDefParametersLoad();
5460
}
61+
mDeviceBackendSettings.deviceType = DeviceType::CUDA;
5562
}
5663

5764
GPUReconstructionCUDABackend::~GPUReconstructionCUDABackend()
5865
{
66+
Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA
5967
if (mMaster == nullptr) {
6068
delete mInternals;
6169
}
@@ -69,7 +77,6 @@ int32_t GPUReconstructionCUDABackend::GPUChkErrInternal(const int64_t error, con
6977

7078
GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg)
7179
{
72-
mDeviceBackendSettings.deviceType = DeviceType::CUDA;
7380
#ifndef __HIPCC__ // CUDA
7481
mRtcSrcExtension = ".cu";
7582
mRtcBinExtension = ".fatbin";
@@ -78,11 +85,7 @@ GPUReconstructionCUDA::GPUReconstructionCUDA(const GPUSettingsDeviceBackend& cfg
7885
mRtcBinExtension = ".o";
7986
#endif
8087
}
81-
82-
GPUReconstructionCUDA::~GPUReconstructionCUDA()
83-
{
84-
Exit(); // Make sure we destroy everything (in particular the ITS tracker) before we exit CUDA
85-
}
88+
GPUReconstructionCUDA::~GPUReconstructionCUDA() {}
8689

8790
GPUReconstruction* GPUReconstruction_Create_CUDA(const GPUSettingsDeviceBackend& cfg) { return new GPUReconstructionCUDA(cfg); }
8891

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -44,8 +44,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
4444
void runKernelBackend(const krnlSetupArgs<T, I, Args...>& args);
4545
template <class T, int32_t I = 0, typename... Args>
4646
void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args);
47-
template <class T, int32_t I = 0>
48-
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend();
4947

5048
void getRTCKernelCalls(std::vector<std::string>& kernels);
5149

0 commit comments

Comments
 (0)