Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 6 additions & 8 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,7 @@ GPUReconstructionCPU::~GPUReconstructionCPU()
}

template <class T, int32_t I, typename... Args>
inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args)
inline void GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args)
{
auto& x = _xyz.x;
auto& y = _xyz.y;
Expand Down Expand Up @@ -90,11 +90,10 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal(const krnlS
}
}
}
return 0;
}

template <>
inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
inline void GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean16, 0>(const krnlSetupTime& _xyz, void* const& ptr, uint64_t const& size)
{
int32_t nnThreads = std::max<int32_t>(1, std::min<int32_t>(size / (16 * 1024 * 1024), getNKernelHostThreads(true)));
if (nnThreads > 1) {
Expand All @@ -112,13 +111,12 @@ inline int32_t GPUReconstructionCPUBackend::runKernelBackendInternal<GPUMemClean
} else {
memset(ptr, 0, size);
}
return 0;
}

template <class T, int32_t I, typename... Args>
int32_t GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs<T, I, Args...>& args)
void GPUReconstructionCPUBackend::runKernelBackend(const krnlSetupArgs<T, I, Args...>& args)
{
return std::apply([this, &args](auto&... vals) { return runKernelBackendInternal<T, I, Args...>(args.s, vals...); }, args.v);
std::apply([this, &args](auto&... vals) { runKernelBackendInternal<T, I, Args...>(args.s, vals...); }, args.v);
}

template <class T, int32_t I>
Expand All @@ -127,8 +125,8 @@ krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend()
return krnlProperties{1, 1};
}

#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
template int32_t GPUReconstructionCPUBackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args); \
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
template void GPUReconstructionCPUBackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args); \
template krnlProperties GPUReconstructionCPUBackend::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>();
#include "GPUReconstructionKernelList.h"
#undef GPUCA_KRNL
Expand Down
29 changes: 14 additions & 15 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ class GPUReconstructionCPUBackend : public GPUReconstructionProcessing
protected:
GPUReconstructionCPUBackend(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionProcessing(cfg) {}
template <class T, int32_t I = 0, typename... Args>
int32_t runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs<T, I, Args...>& args);
void runKernelBackend(const gpu_reconstruction_kernels::krnlSetupArgs<T, I, Args...>& args);
template <class T, int32_t I = 0, typename... Args>
int32_t runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args);
void runKernelBackendInternal(const gpu_reconstruction_kernels::krnlSetupTime& _xyz, const Args&... args);
template <class T, int32_t I>
gpu_reconstruction_kernels::krnlProperties getKernelPropertiesBackend();
};
Expand All @@ -53,7 +53,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
static constexpr krnlEvent krnlEventNone = krnlEvent{nullptr, nullptr, 0};

template <class S, int32_t I = 0, typename... Args>
int32_t runKernel(krnlSetup&& setup, Args&&... args);
void runKernel(krnlSetup&& setup, Args&&... args);
template <class S, int32_t I = 0>
const gpu_reconstruction_kernels::krnlProperties getKernelProperties()
{
Expand All @@ -77,14 +77,14 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP

GPUReconstructionCPU(const GPUSettingsDeviceBackend& cfg) : GPUReconstructionKernels(cfg) {}

#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \
inline int32_t runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
{ \
if (cpuFallback) { \
return GPUReconstructionCPU::runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
} else { \
return runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
} \
#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \
inline void runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>, bool cpuFallback, double& timer, krnlSetup&& setup GPUCA_M_STRIP(x_arguments)) \
{ \
if (cpuFallback) { \
GPUReconstructionCPU::runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
} else { \
runKernelImpl(krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>(setup.x, setup.y, setup.z, timer GPUCA_M_STRIP(x_forward))); \
} \
}
#include "GPUReconstructionKernelList.h"
#undef GPUCA_KRNL
Expand Down Expand Up @@ -131,7 +131,7 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
};

template <class S, int32_t I, typename... Args>
inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args)
inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args)
{
HighResTimer* t = nullptr;
GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep();
Expand Down Expand Up @@ -164,7 +164,7 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args
GPUInfo("Running kernel %s (Stream %d, Range %d/%d, Grid %d/%d) on %s", GetKernelName<S, I>(), stream, setup.y.start, setup.y.num, nBlocks, nThreads, cpuFallback == 2 ? "CPU (forced)" : cpuFallback ? "CPU (fallback)" : mDeviceName.c_str());
}
if (nThreads == 0 || nBlocks == 0) {
return 0;
return;
}
if (mProcessingSettings.debugLevel >= 1) {
t = &getKernelTimer<S, I>(myStep, !IsGPU() || cpuFallback ? getHostThreadIndex() : stream);
Expand All @@ -173,7 +173,7 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args
}
}
double deviceTimerTime = 0.;
int32_t retVal = runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<S, I>(), cpuFallback, deviceTimerTime, std::forward<krnlSetup&&>(setup), std::forward<Args>(args)...);
runKernelImplWrapper(gpu_reconstruction_kernels::classArgument<S, I>(), cpuFallback, deviceTimerTime, std::forward<krnlSetup&&>(setup), std::forward<Args>(args)...);
if (GPUDebug(GetKernelName<S, I>(), stream, mProcessingSettings.serializeGPU & 1)) {
throw std::runtime_error("kernel failure");
}
Expand All @@ -192,7 +192,6 @@ inline int32_t GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args
throw std::runtime_error("kernel error code");
}
}
return retVal;
}

} // namespace o2::gpu
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstructionKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -99,9 +99,9 @@ class GPUReconstructionKernels : public T
using krnlSetupArgs = gpu_reconstruction_kernels::krnlSetupArgs<S, I, Args...>;

#define GPUCA_KRNL(x_class, attributes, x_arguments, x_forward, x_types) \
virtual int32_t runKernelImpl(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args) \
virtual void runKernelImpl(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args) \
{ \
return T::template runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(args); \
T::template runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(args); \
} \
virtual gpu_reconstruction_kernels::krnlProperties getKernelPropertiesImpl(gpu_reconstruction_kernels::classArgument<GPUCA_M_KRNL_TEMPLATE(x_class)>) \
{ \
Expand Down
11 changes: 8 additions & 3 deletions GPU/GPUTracking/Base/GPUReconstructionProcessing.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -45,9 +45,14 @@ void GPUReconstructionProcessing::SetNActiveThreads(int32_t n)

void GPUReconstructionProcessing::runParallelOuterLoop(bool doGPU, uint32_t nThreads, std::function<void(uint32_t)> lambda)
{
tbb::task_arena(SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads)).execute([&] {
tbb::parallel_for<uint32_t>(0, nThreads, lambda, tbb::simple_partitioner());
});
uint32_t nThreadsAdjusted = SetAndGetNActiveThreadsOuterLoop(!doGPU, nThreads);
if (nThreadsAdjusted > 1) {
tbb::task_arena(nThreadsAdjusted).execute([&] {
tbb::parallel_for<uint32_t>(0, nThreads, lambda, tbb::simple_partitioner());
});
} else {
lambda(0);
}
}

namespace o2::gpu
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ endif()
message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}")

set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu)
set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludes.h CUDAThrustHelpers.h)
set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h)
# -------------------------------- Prepare RTC -------------------------------------------------------
enable_language(ASM)
if(ALIGPU_BUILD_TYPE STREQUAL "O2")
Expand Down Expand Up @@ -67,7 +67,7 @@ set(GPU_RTC_BIN ${CMAKE_CURRENT_BINARY_DIR}/GPUReconstructionCUDArtc)
# cmake-format: off
add_custom_command(
OUTPUT ${GPU_RTC_BIN}.src
COMMAND cat ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludes.h > ${GPU_RTC_BIN}.src
COMMAND cp ${GPUDIR}/Base/cuda/GPUReconstructionCUDAIncludesHost.h ${GPU_RTC_BIN}.src
COMMAND ${CMAKE_CXX_COMPILER} ${GPU_RTC_DEFINES} ${GPU_RTC_INCLUDES} -std=c++${CMAKE_CUDA_STANDARD} -D__CUDA_ARCH__=${RTC_CUDA_ARCH} -D__CUDACC__ -x c++ -nostdinc -E ${GPU_RTC_SRC} >> ${GPU_RTC_BIN}.src
MAIN_DEPENDENCY ${GPU_RTC_SRC}
IMPLICIT_DEPENDS CXX ${GPU_RTC_SRC}
Expand Down
3 changes: 1 addition & 2 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
/// \author David Rohr

#define GPUCA_GPUCODE_HOSTONLY
#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionCUDAIncludes.h"
#include "GPUReconstructionCUDAIncludesHost.h"

#include <cuda_profiler_api.h>

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
void PrintKernelOccupancies() override;

template <class T, int32_t I = 0, typename... Args>
int32_t runKernelBackend(const krnlSetupArgs<T, I, Args...>& args);
void runKernelBackend(const krnlSetupArgs<T, I, Args...>& args);
template <class T, int32_t I = 0, typename... Args>
void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args);
template <class T, int32_t I = 0>
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDADef.h
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUReconstructionCUDDef.h
/// \file GPUReconstructionCUDADef.h
/// \author David Rohr

#ifndef O2_GPU_GPURECONSTRUCTIONCUDADEF_H
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
/// \file GPUReconstructionCUDAExternalProvider.cu
/// \author David Rohr

#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionCUDAIncludes.h"
#include "GPUReconstructionCUDAIncludesHost.h"

#include "GPUReconstructionCUDA.h"
#include "GPUReconstructionCUDAInternals.h"
Expand Down
4 changes: 3 additions & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,9 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command_arch);

int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
{
std::string rtcparam = std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") + GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr);
std::string rtcparam = std::string("#define GPUCA_RTC_CODE\n") +
std::string(mProcessingSettings.rtc.optSpecialCode ? "#define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n" : "#define GPUCA_RTC_SPECIAL_CODE(...)\n") +
GPUParamRTC::generateRTCCode(param(), mProcessingSettings.rtc.optConstexpr);
if (filename == "") {
filename = "/tmp/o2cagpu_rtc_";
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -32,4 +32,8 @@
#include <sm_20_atomic_functions.h>
#include <cuda_fp16.h>

#ifndef GPUCA_RTC_CODE
#include "GPUReconstructionCUDADef.h"
#endif

#endif
10 changes: 4 additions & 6 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,7 @@
/// \file GPUReconstructionCUDAKernels.cu
/// \author David Rohr

#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionCUDAIncludes.h"
#include "GPUReconstructionCUDAIncludesHost.h"

#include "GPUReconstructionCUDA.h"
#include "GPUReconstructionCUDAInternals.h"
Expand Down Expand Up @@ -67,7 +66,7 @@ inline void GPUReconstructionCUDABackend::runKernelBackendInternal(const krnlSet
}

template <class T, int32_t I, typename... Args>
int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I, Args...>& args)
void GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I, Args...>& args)
{
auto& x = args.s.x;
auto& z = args.s.z;
Expand All @@ -84,7 +83,6 @@ int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I,
if (z.ev) {
GPUFailedMsg(cudaEventRecord(*(cudaEvent_t*)z.ev, mInternals->Streams[x.stream]));
}
return 0;
}

#undef GPUCA_KRNL_REG
Expand All @@ -93,7 +91,7 @@ int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I,
#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 1
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
GPUCA_KRNL_PROP(x_class, x_attributes) \
template int32_t GPUReconstructionCUDABackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args);
template void GPUReconstructionCUDABackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args);
#else
#if defined(GPUCA_KERNEL_COMPILE_MODE) && GPUCA_KERNEL_COMPILE_MODE == 2
#define GPUCA_KRNL_DEFONLY
Expand All @@ -102,7 +100,7 @@ int32_t GPUReconstructionCUDABackend::runKernelBackend(const krnlSetupArgs<T, I,
#define GPUCA_KRNL(x_class, x_attributes, x_arguments, x_forward, x_types) \
GPUCA_KRNL_PROP(x_class, x_attributes) \
GPUCA_KRNL_WRAP(GPUCA_KRNL_, x_class, x_attributes, x_arguments, x_forward, x_types) \
template int32_t GPUReconstructionCUDABackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args);
template void GPUReconstructionCUDABackend::runKernelBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>(const krnlSetupArgs<GPUCA_M_KRNL_TEMPLATE(x_class) GPUCA_M_STRIP(x_types)>& args);
#ifndef __HIPCC__ // CUDA version
#define GPUCA_KRNL_CALL_single(x_class, ...) \
GPUCA_M_CAT(krnl_, GPUCA_M_KRNL_NAME(x_class))<<<x.nBlocks, x.nThreads, 0, me->mInternals->Streams[x.stream]>>>(GPUCA_CONSMEM_CALL y.start, args...);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -13,8 +13,7 @@
/// \author David Rohr

#define GPUCA_GPUCODE_COMPILEKERNELS
#include "GPUReconstructionCUDAIncludes.h"
#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionCUDAIncludesHost.h"
#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_MAX2_3(GPUCA_M_STRIP(args)))
#define GPUCA_KRNL(...) GPUCA_KRNL_WRAP(GPUCA_KRNL_LOAD_, __VA_ARGS__)
#define GPUCA_KRNL_LOAD_single(...) GPUCA_KRNLGPU_SINGLE(__VA_ARGS__);
Expand Down
Loading