|
51 | 51 | #define GPUCA_ATTRRES3(XX) // 3 attributes not supported |
52 | 52 | #define GPUCA_ATTRRES2(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES2_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) |
53 | 53 | #define GPUCA_ATTRRES(XX, ...) GPUCA_M_EXPAND(GPUCA_M_CAT(GPUCA_ATTRRES_, GPUCA_M_FIRST(__VA_ARGS__)))(XX, __VA_ARGS__) |
54 | | -// GPU Kernel entry point for single sector |
55 | | -#define GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, ...) \ |
56 | | - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t iSector_internal GPUCA_M_STRIP(x_arguments)) |
57 | | -#ifdef GPUCA_KRNL_DEFONLY |
58 | | -#define GPUCA_KRNLGPU_SINGLE(...) GPUCA_KRNLGPU_SINGLE_DEF(__VA_ARGS__); |
59 | | -#else |
60 | | -#define GPUCA_KRNLGPU_SINGLE(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_SINGLE_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ |
61 | | - { \ |
62 | | - GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ |
63 | | - GPUCA_M_STRIP_FIRST(x_class)::template Thread<GPUCA_M_KRNL_NUM(x_class)>(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[iSector_internal] GPUCA_M_STRIP(x_forward)); \ |
64 | | - } |
65 | | -#endif |
66 | 54 |
|
67 | | -// GPU Kernel entry point for multiple sector |
68 | | -#define GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, ...) \ |
69 | | - GPUg() void GPUCA_ATTRRES(,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes))) GPUCA_M_CAT3(krnl_, GPUCA_M_KRNL_NAME(x_class), _multi)(GPUCA_CONSMEM_PTR int32_t firstSector, int32_t nSectorCount GPUCA_M_STRIP(x_arguments)) |
| 55 | +// GPU Kernel entry point |
| 56 | +#define GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, ...) \ |
| 57 | + GPUg() void GPUCA_ATTRRES(,GPUCA_M_STRIP(x_attributes)) GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))(GPUCA_CONSMEM_PTR int32_t _iSector_internal GPUCA_M_STRIP(x_arguments)) |
| 58 | + |
70 | 59 | #ifdef GPUCA_KRNL_DEFONLY |
71 | | -#define GPUCA_KRNLGPU_MULTI(...) GPUCA_KRNLGPU_MULTI_DEF(__VA_ARGS__); |
| 60 | +#define GPUCA_KRNLGPU(...) GPUCA_KRNLGPU_DEF(__VA_ARGS__); |
72 | 61 | #else |
73 | | -#define GPUCA_KRNLGPU_MULTI(x_class, x_attributes, x_arguments, x_forward, ...) GPUCA_KRNLGPU_MULTI_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ |
| 62 | +#define GPUCA_KRNLGPU(x_class, x_attributes, x_arguments, x_forward, ...) \ |
| 63 | + GPUCA_KRNLGPU_DEF(x_class, x_attributes, x_arguments, x_forward, __VA_ARGS__) \ |
74 | 64 | { \ |
75 | | - const int32_t iSector_internal = nSectorCount * (get_group_id(0) + (get_num_groups(0) % nSectorCount != 0 && nSectorCount * (get_group_id(0) + 1) % get_num_groups(0) != 0)) / get_num_groups(0); \ |
76 | | - const int32_t nSectorBlockOffset = get_num_groups(0) * iSector_internal / nSectorCount; \ |
77 | | - const int32_t sectorBlockId = get_group_id(0) - nSectorBlockOffset; \ |
78 | | - const int32_t sectorGridDim = get_num_groups(0) * (iSector_internal + 1) / nSectorCount - get_num_groups(0) * (iSector_internal) / nSectorCount; \ |
79 | 65 | GPUshared() typename GPUCA_M_STRIP_FIRST(x_class)::GPUSharedMemory smem; \ |
80 | | - GPUCA_M_STRIP_FIRST(x_class)::template Thread<GPUCA_M_KRNL_NUM(x_class)>(sectorGridDim, get_local_size(0), sectorBlockId, get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[firstSector + iSector_internal] GPUCA_M_STRIP(x_forward)); \ |
| 66 | + GPUCA_M_STRIP_FIRST(x_class)::template Thread<GPUCA_M_KRNL_NUM(x_class)>(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), smem, GPUCA_M_STRIP_FIRST(x_class)::Processor(GPUCA_CONSMEM)[_iSector_internal] GPUCA_M_STRIP(x_forward)); \ |
81 | 67 | } |
82 | 68 | #endif |
83 | 69 |
|
84 | | -// GPU Host wrapper pre- and post-parts |
85 | | -#define GPUCA_KRNL_PRE(x_class, ...) \ |
| 70 | +// GPU Host wrappers for kernel |
| 71 | +#define GPUCA_KRNL_HOST(x_class, ...) \ |
| 72 | + GPUCA_KRNLGPU(x_class, __VA_ARGS__) \ |
86 | 73 | template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal<GPUCA_M_KRNL_TEMPLATE(x_class)> { \ |
87 | 74 | public: \ |
88 | 75 | template <typename T, typename... Args> \ |
89 | 76 | static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \ |
90 | 77 | { \ |
91 | 78 | auto& x = _xyz.x; \ |
92 | | - auto& y = _xyz.y; |
93 | | - |
94 | | -#define GPUCA_KRNL_POST() \ |
| 79 | + auto& y = _xyz.y; \ |
| 80 | + GPUCA_KRNL_CALL(x_class, __VA_ARGS__) \ |
95 | 81 | } \ |
96 | 82 | }; |
97 | 83 |
|
98 | | -// GPU Host wrappers for single kernel, multi-sector, or auto-detection |
99 | | -#define GPUCA_KRNL_single(...) \ |
100 | | - GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \ |
101 | | - GPUCA_KRNL_PRE(__VA_ARGS__) \ |
102 | | - if (y.num > 1) { \ |
103 | | - throw std::runtime_error("Kernel called with invalid number of sectors"); \ |
104 | | - } else { \ |
105 | | - GPUCA_KRNL_CALL_single(__VA_ARGS__) \ |
106 | | - } \ |
107 | | - GPUCA_KRNL_POST() |
108 | | - |
109 | | -#define GPUCA_KRNL_multi(...) \ |
110 | | - GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \ |
111 | | - GPUCA_KRNL_PRE(__VA_ARGS__) \ |
112 | | - GPUCA_KRNL_CALL_multi(__VA_ARGS__) \ |
113 | | - GPUCA_KRNL_POST() |
114 | | - |
115 | | -#define GPUCA_KRNL_(...) GPUCA_KRNL_single(__VA_ARGS__) |
116 | | -#define GPUCA_KRNL_simple(...) GPUCA_KRNL_single(__VA_ARGS__) |
117 | | -#define GPUCA_KRNL_both(...) \ |
118 | | - GPUCA_KRNLGPU_SINGLE(__VA_ARGS__) \ |
119 | | - GPUCA_KRNLGPU_MULTI(__VA_ARGS__) \ |
120 | | - GPUCA_KRNL_PRE(__VA_ARGS__) \ |
121 | | - if (y.num <= 1) { \ |
122 | | - GPUCA_KRNL_CALL_single(__VA_ARGS__) \ |
123 | | - } else { \ |
124 | | - GPUCA_KRNL_CALL_multi(__VA_ARGS__) \ |
125 | | - } \ |
126 | | - GPUCA_KRNL_POST() |
127 | | - |
128 | | -#define GPUCA_KRNL_LOAD_(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__) |
129 | | -#define GPUCA_KRNL_LOAD_simple(...) GPUCA_KRNL_LOAD_single(__VA_ARGS__) |
130 | | -#define GPUCA_KRNL_LOAD_both(...) \ |
131 | | - GPUCA_KRNL_LOAD_single(__VA_ARGS__) \ |
132 | | - GPUCA_KRNL_LOAD_multi(__VA_ARGS__) |
133 | | - |
134 | 84 | #define GPUCA_KRNL_PROP(x_class, x_attributes) \ |
135 | 85 | template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \ |
136 | | - gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_SHIFT(GPUCA_M_STRIP(x_attributes)))}; \ |
| 86 | + gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_INTERNAL_PROP,GPUCA_M_STRIP(x_attributes))}; \ |
137 | 87 | return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \ |
138 | 88 | } |
139 | 89 |
|
140 | | -// Generate GPU kernel and host wrapper |
141 | | -#define GPUCA_KRNL_WRAP(x_func, x_class, x_attributes, ...) GPUCA_M_CAT(x_func, GPUCA_M_STRIP_FIRST(x_attributes))(x_class, x_attributes, __VA_ARGS__) |
142 | 90 | #endif // GPUCA_GPUCODE |
143 | 91 |
|
144 | | -#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (GPUCA_M_STRIP(x_attributes), REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class)))), __VA_ARGS__) |
| 92 | +#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_LB_, GPUCA_M_KRNL_NAME(x_class)))), __VA_ARGS__) |
145 | 93 |
|
146 | 94 | #endif // O2_GPU_GPURECONSTRUCTIONKERNELMACROS_H |
147 | 95 | // clang-format on |
0 commit comments