Skip to content

Commit 504bb9c

Browse files
committed
GPU: Get rid of GPUCA_RTC_LB_..., use only GPUCA_LB_...
1 parent a87ed76 commit 504bb9c

10 files changed

+56
-36
lines changed

GPU/GPUTracking/Base/cuda/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ if(DEFINED CUDA_COMPUTETARGET)
1717
endif()
1818
message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}")
1919

20-
set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu)
20+
set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDARTCCalls.cu)
2121
set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesSystem.h)
2222
# -------------------------------- Prepare RTC -------------------------------------------------------
2323
enable_language(ASM)

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -45,8 +45,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
4545
template <class T, int32_t I = 0, typename... Args>
4646
void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args);
4747

48-
void getRTCKernelCalls(std::vector<std::string>& kernels);
49-
5048
template <class T, class S>
5149
friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp);
5250
GPUReconstructionCUDAInternals* mInternals;
@@ -91,6 +89,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
9189

9290
private:
9391
int32_t genRTC(std::string& filename, uint32_t& nCompile);
92+
void getRTCKernelCalls(std::vector<std::string>& kernels);
9493
void genAndLoadRTC();
9594
void loadKernelModules(bool perKernel);
9695
const char *mRtcSrcExtension = ".src", *mRtcBinExtension = ".o";

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAIncludesSystem.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
/// \file GPUReconstructionCUDAIncludesSystem.h
1313
/// \author David Rohr
1414

15-
#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H
16-
#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H
15+
#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H
16+
#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H
1717

1818
#include <cstdint>
1919
#include <type_traits>
@@ -32,4 +32,4 @@
3232
#include <sm_20_atomic_functions.h>
3333
#include <cuda_fp16.h>
3434

35-
#endif
35+
#endif // O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu

Lines changed: 0 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -118,14 +118,3 @@ static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstS
118118
return retVal;
119119
});
120120
#endif
121-
122-
void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector<std::string>& kernels)
123-
{
124-
#undef GPUCA_KRNL_LB
125-
#undef __launch_bounds__
126-
#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__)));
127-
#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_RTC_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__)
128-
#include "GPUReconstructionKernelList.h"
129-
#undef GPUCA_KRNL
130-
#undef GPUCA_KRNL_LB
131-
}
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
/// \file GPUReconstructionCUDARTCCalls.cu
13+
/// \author David Rohr
14+
15+
#define GPUCA_GPUCODE_HOSTONLY
16+
#define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS
17+
18+
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args))
19+
20+
#include "GPUReconstructionCUDAIncludesSystem.h"
21+
#include "GPUReconstructionCUDADef.h"
22+
#include "GPUReconstructionCUDA.h"
23+
24+
using namespace o2::gpu;
25+
26+
void GPUReconstructionCUDA::getRTCKernelCalls(std::vector<std::string>& kernels)
27+
{
28+
#undef GPUCA_KRNL
29+
#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__)));
30+
#undef __launch_bounds__
31+
#include "GPUReconstructionKernelList.h"
32+
}

GPU/GPUTracking/Base/hip/CMakeLists.txt

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,7 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}")
2424
if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
2525
set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify)
2626
file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR})
27-
set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu)
27+
set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu)
2828
set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesSystem.h)
2929
set(HIP_SOURCES "")
3030
foreach(file ${GPUCA_HIP_FILE_LIST})
@@ -61,7 +61,7 @@ else()
6161
get_filename_component(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ABSOLUTE)
6262
endif()
6363

64-
set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip)
64+
set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPRTCCalls.hip)
6565
set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx)
6666
set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesSystem.h)
6767

GPU/GPUTracking/Base/hip/GPUReconstructionHIPIncludesSystem.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,8 +12,8 @@
1212
/// \file GPUReconstructionHIPIncludesSystem.h
1313
/// \author David Rohr
1414

15-
#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDES_H
16-
#define O2_GPU_RECONSTRUCTIONHIPINCLUDES_H
15+
#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H
16+
#define O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H
1717

1818
#include <hip/hip_runtime.h>
1919
#include <hip/hip_ext.h>
@@ -25,4 +25,4 @@
2525
#include <thrust/device_ptr.h>
2626
#pragma GCC diagnostic pop
2727

28-
#endif
28+
#endif // O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H

GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414

1515
// This file contains compile-time constants affecting the GPU performance.
1616

17-
#ifndef GPUDEFPARAMETERSDEFAULTS_H
17+
#if !defined(GPUDEFPARAMETERSDEFAULTS_H) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection.
1818
#define GPUDEFPARAMETERSDEFAULTS_H
1919
// clang-format off
2020

GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc

Lines changed: 11 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -37,17 +37,17 @@ static GPUDefParameters GPUDefParametersLoad()
3737
};
3838
}
3939

40-
#define GPUCA_EXPORT_KERNEL(name) \
41-
if (par.par_LB_maxThreads[i] > 0) { \
42-
o << "#define GPUCA_" << (forRTC ? "RTC_" : "") << "LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \
43-
if (par.par_LB_minBlocks[i] > 0) { \
44-
o << ", " << par.par_LB_minBlocks[i]; \
45-
} \
46-
if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \
47-
o << ", " << par.par_LB_forceBlocks[i]; \
48-
} \
49-
o << "\n"; \
50-
} \
40+
#define GPUCA_EXPORT_KERNEL(name) \
41+
if (par.par_LB_maxThreads[i] > 0) { \
42+
o << "#define GPUCA_LB_" << GPUCA_M_STR(name) " " << par.par_LB_maxThreads[i]; \
43+
if (par.par_LB_minBlocks[i] > 0) { \
44+
o << ", " << par.par_LB_minBlocks[i]; \
45+
} \
46+
if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \
47+
o << ", " << par.par_LB_forceBlocks[i]; \
48+
} \
49+
o << "\n"; \
50+
} \
5151
i++;
5252

5353
static std::string GPUDefParametersExport(const GPUDefParameters& par, bool forRTC)

GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,14 +22,14 @@
2222
#include "GPUCommonDef.h"
2323
#include "GPUDefMacros.h"
2424

25-
#ifndef GPUCA_GPUCODE_GENRTC
25+
#if defined(GPUCA_GPUCODE)
2626
#include "GPUDefParametersDefaults.h"
2727
#endif
2828
#include "GPUDefParametersConstants.h"
2929

3030
namespace o2::gpu
3131
{
32-
#if defined(GPUCA_GPUCODE)
32+
#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS)
3333
GPUhdi() static constexpr uint32_t GPUCA_GET_THREAD_COUNT(uint32_t val, ...) { return val; }
3434
GPUhdi() static constexpr uint32_t GPUCA_GET_WARP_COUNT(uint32_t val, ...) { return val / GPUCA_WARP_SIZE; }
3535
#else

0 commit comments

Comments
 (0)