Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions GPU/Common/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ set(HDRS_INSTALL
GPUCommonAlgorithm.h
GPUCommonDef.h
GPUCommonDefAPI.h
GPUCommonHelpers.h
GPUCommonDefSettings.h
GPUCommonConstants.h
GPUCommonLogger.h
Expand Down
24 changes: 18 additions & 6 deletions GPU/Common/GPUCommonDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,13 +35,25 @@
#define GPUCA_GPUCODE // Compiled by GPU compiler
#endif

#if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__)
#define GPUCA_GPUCODE_DEVICE // Executed on device
#if defined(GPUCA_GPUCODE)
#if defined(__CUDA_ARCH__) || defined(__OPENCL__) || defined(__HIP_DEVICE_COMPILE__)
#define GPUCA_GPUCODE_DEVICE // Executed on device
#endif
#if defined(__CUDACC__)
#define GPUCA_GPUTYPE CUDA
#elif defined(__HIPCC__)
#define GPUCA_GPUTYPE HIP
#elif defined(__OPENCL__) || defined(__OPENCL_HOST__)
#define GPUCA_GPUTYPE OCL
#endif
#endif
#endif
#ifndef GPUCA_GPUTYPE
#define GPUCA_GPUTYPE CPU
#endif

#if defined(GPUCA_STANDALONE) || (defined(GPUCA_O2_LIB) && !defined(GPUCA_O2_INTERFACE)) || defined (GPUCA_GPUCODE)
#define GPUCA_ALIGPUCODE
#define GPUCA_ALIGPUCODE // Part of GPUTracking library but not of interface
#endif

#if (defined(__CUDACC__) && defined(GPUCA_CUDA_NO_CONSTANT_MEMORY)) || (defined(__HIPCC__) && defined(GPUCA_HIP_NO_CONSTANT_MEMORY)) || (defined(__OPENCL__) && defined(GPUCA_OPENCL_NO_CONSTANT_MEMORY))
Expand All @@ -51,13 +63,13 @@
#endif

#if !defined(GPUCA_GPUCODE) && !defined(GPUCA_STANDALONE) && defined(DEBUG_STREAMER)
#define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__
#define GPUCA_DEBUG_STREAMER_CHECK(...) __VA_ARGS__
#else
#define GPUCA_DEBUG_STREAMER_CHECK(...)
#define GPUCA_DEBUG_STREAMER_CHECK(...)
#endif

#ifndef GPUCA_RTC_SPECIAL_CODE
#define GPUCA_RTC_SPECIAL_CODE(...)
#define GPUCA_RTC_SPECIAL_CODE(...)
#endif

// API Definitions for GPU Compilation
Expand Down
2 changes: 1 addition & 1 deletion GPU/Common/GPUCommonDefAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
#define GPUdni() // Device function, not-to-be-inlined
#define GPUdnii() inline // Device function, not-to-be-inlined on device, inlined on host
#define GPUh() // Host-only function
// NOTE: All GPUd*() functions are also compiled on the host during GCC compilation.
// NOTE: All GPUd*() functions are also compiled on the host during host compilation.
// The GPUh*() macros are for the rare cases of functions that you want to compile for the host during GPU compilation.
// Usually, you do not need the GPUh*() versions. If in doubt, use GPUd*()!
#define GPUhi() inline // to-be-inlined host-only function
Expand Down
62 changes: 62 additions & 0 deletions GPU/Common/GPUCommonHelpers.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUCommonHelpers.h
/// \author David Rohr

// GPUChkErr and GPUChkErrI will both check x for an error, using the loaded backend of GPUReconstruction (requiring GPUReconstruction.h to be included by the user).
// In case of an error, it will print out the corresponding CUDA / HIP / OpenCL error code
// GPUChkErr will download GPUReconstruction error values from GPU, print them, and terminate the application with an exception if an error occured.
// GPUChkErrI will return 0 or 1, depending on whether an error has occurred.
// These Macros must be called ona GPUReconstruction instance.
// The GPUChkErrS and GPUChkErrSI are similar but static, without required GPUReconstruction instance.
// Examples:
// if (mRec->GPUChkErrI(cudaMalloc(...))) { exit(1); }
// gpuRecObj.GPUChkErr(cudaMalloc(...));
// if (GPUChkErrSI(cudaMalloc(..))) { exit(1); }

#ifndef GPUCOMMONHELPERS_H
#define GPUCOMMONHELPERS_H

// Please #include "GPUReconstruction.h" in your code, if you use these 2!
#define GPUChkErr(x) GPUChkErrA(x, __FILE__, __LINE__, true)
#define GPUChkErrI(x) GPUChkErrA(x, __FILE__, __LINE__, false)
#define GPUChkErrS(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, true)
#define GPUChkErrSI(x) o2::gpu::internal::GPUReconstructionChkErr(x, __FILE__, __LINE__, false)

#include "GPUCommonDef.h"
#include <cstdint>

namespace o2::gpu::internal
{
#define GPUCOMMON_INTERNAL_CAT_A(a, b, c) a##b##c
#define GPUCOMMON_INTERNAL_CAT(...) GPUCOMMON_INTERNAL_CAT_A(__VA_ARGS__)
extern int32_t GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(const int64_t error, const char* file, int32_t line);
inline int32_t GPUReconstructionCPUChkErr(const int64_t error, const char* file, int32_t line)
{
if (error) {
GPUError("GPUCommon Error Code %d (%s:%d)", error, file, line);
}
return error != 0;
}
static inline int32_t GPUReconstructionChkErr(const int64_t error, const char* file, int32_t line, bool failOnError)
{
int32_t retVal = error && GPUCOMMON_INTERNAL_CAT(GPUReconstruction, GPUCA_GPUTYPE, ChkErr)(error, file, line);
if (retVal && failOnError) {
throw std::runtime_error("GPU API Call Failure");
}
return error;
}
#undef GPUCOMMON_INTERNAL_CAT_A
#undef GPUCOMMON_INTERNAL_CAT
} // namespace o2::gpu::internal

#endif
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUConstantMem.h
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@ static constexpr size_t gGPUConstantMemBufferSize = (sizeof(GPUConstantMem) + si
#endif
} // namespace o2::gpu
#if defined(GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM) && !defined(GPUCA_GPUCODE_HOSTONLY)
GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer;
GPUconstant() o2::gpu::GPUConstantMemCopyable gGPUConstantMemBuffer; // TODO: This should go into o2::gpu namespace, but then CUDA or HIP would not find the symbol
#endif // GPUCA_HAS_GLOBAL_SYMBOL_CONSTANT_MEM
namespace o2::gpu
{
Expand Down
6 changes: 3 additions & 3 deletions GPU/GPUTracking/Base/GPUGeneralKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,7 @@ class GPUKernelTemplate
};

typedef GPUconstantref() GPUConstantMem processorType;
GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; }
GPUhdi() static processorType* Processor(GPUConstantMem& processors)
{
return &processors;
Expand All @@ -94,7 +94,7 @@ class GPUKernelTemplate
class GPUMemClean16 : public GPUKernelTemplate
{
public:
GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; }
template <int32_t iKernel = defaultKernel>
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() void* ptr, uint64_t size);
};
Expand All @@ -103,7 +103,7 @@ class GPUMemClean16 : public GPUKernelTemplate
class GPUitoa : public GPUKernelTemplate
{
public:
GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUCA_RECO_STEP::NoRecoStep; }
GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep() { return GPUDataTypes::RecoStep::NoRecoStep; }
template <int32_t iKernel = defaultKernel>
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUsharedref() GPUSharedMemory& smem, processorType& processors, GPUglobalref() int32_t* ptr, uint64_t size);
};
Expand Down
8 changes: 4 additions & 4 deletions GPU/GPUTracking/Base/GPUParam.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,10 @@ struct GPUParam_t {
int32_t continuousMaxTimeBin;
int32_t tpcCutTimeBin;

GPUTPCGeometry tpcGeometry; // TPC Geometry
GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM
const uint32_t* occupancyMap; // Ptr to TPC occupancy map
uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf)
GPUTPCGeometry tpcGeometry; // TPC Geometry
GPUTPCGMPolynomialField polynomialField; // Polynomial approx. of magnetic field for TPC GM
const uint32_t* occupancyMap; // Ptr to TPC occupancy map
uint32_t occupancyTotal; // Total occupancy in the TPC (nCl / nHbf)

GPUParamSector SectorParam[GPUCA_NSECTORS];

Expand Down
15 changes: 15 additions & 0 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -1078,6 +1078,21 @@ int32_t GPUReconstruction::CheckErrorCodes(bool cpuOnly, bool forceShowErrors, s
return retVal;
}

int32_t GPUReconstruction::GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError)
{
if (error == 0 || !GPUChkErrInternal(error, file, line)) {
return 0;
}
if (failOnError) {
if (mInitialized && mInErrorHandling == false) {
mInErrorHandling = true;
CheckErrorCodes(false, true);
}
throw std::runtime_error("GPU Backend Failure");
}
return 1;
}

void GPUReconstruction::DumpSettings(const char* dir)
{
std::string f;
Expand Down
7 changes: 5 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace gpu_reconstruction_kernels
{
struct deviceEvent;
class threadContext;
}
} // namespace gpu_reconstruction_kernels

class GPUReconstruction
{
Expand Down Expand Up @@ -143,6 +143,7 @@ class GPUReconstruction
virtual void* getGPUPointer(void* ptr) { return ptr; }
virtual void startGPUProfiling() {}
virtual void endGPUProfiling() {}
int32_t GPUChkErrA(const int64_t error, const char* file, int32_t line, bool failOnError);
int32_t CheckErrorCodes(bool cpuOnly = false, bool forceShowErrors = false, std::vector<std::array<uint32_t, 4>>* fillErrors = nullptr);
void RunPipelineWorker();
void TerminatePipelineWorker();
Expand Down Expand Up @@ -192,7 +193,7 @@ class GPUReconstruction
bool IsInitialized() const { return mInitialized; }
void SetSettings(float solenoidBzNominalGPU, const GPURecoStepConfiguration* workflow = nullptr);
void SetSettings(const GPUSettingsGRP* grp, const GPUSettingsRec* rec = nullptr, const GPUSettingsProcessing* proc = nullptr, const GPURecoStepConfiguration* workflow = nullptr);
void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init()
void SetResetTimers(bool reset) { mProcessingSettings.resetTimers = reset; } // May update also after Init()
void SetDebugLevelTmp(int32_t level) { mProcessingSettings.debugLevel = level; } // Temporarily, before calling SetSettings()
void UpdateSettings(const GPUSettingsGRP* g, const GPUSettingsProcessing* p = nullptr, const GPUSettingsRecDynamic* d = nullptr);
void UpdateDynamicSettings(const GPUSettingsRecDynamic* d);
Expand Down Expand Up @@ -246,6 +247,7 @@ class GPUReconstruction
void UpdateMaxMemoryUsed();
int32_t EnqueuePipeline(bool terminate = false);
GPUChain* GetNextChainInQueue();
virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const { return 0; }

virtual int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) = 0;
virtual int32_t unregisterMemoryForGPU_internal(const void* ptr) = 0;
Expand Down Expand Up @@ -327,6 +329,7 @@ class GPUReconstruction

// Others
bool mInitialized = false;
bool mInErrorHandling = false;
uint32_t mStatNEvents = 0;
uint32_t mNEventsProcessed = 0;
double mStatKernelTime = 0.;
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstructionCPU.h
Original file line number Diff line number Diff line change
Expand Up @@ -134,8 +134,8 @@ template <class S, int32_t I, typename... Args>
inline void GPUReconstructionCPU::runKernel(krnlSetup&& setup, Args&&... args)
{
HighResTimer* t = nullptr;
GPUCA_RECO_STEP myStep = S::GetRecoStep() == GPUCA_RECO_STEP::NoRecoStep ? setup.x.step : S::GetRecoStep();
if (myStep == GPUCA_RECO_STEP::NoRecoStep) {
GPUDataTypes::RecoStep myStep = S::GetRecoStep() == GPUDataTypes::RecoStep::NoRecoStep ? setup.x.step : S::GetRecoStep();
if (myStep == GPUDataTypes::RecoStep::NoRecoStep) {
throw std::runtime_error("Failure running general kernel without defining RecoStep");
}
int32_t cpuFallback = IsGPU() ? (setup.x.device == krnlDeviceType::CPU ? 2 : (mRecoSteps.stepsGPUMask & myStep) != myStep) : 0;
Expand Down
6 changes: 5 additions & 1 deletion GPU/GPUTracking/Base/GPUReconstructionDeviceBase.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -175,7 +175,11 @@ void GPUReconstructionDeviceBase::runConstantRegistrators()
{
auto& list = getDeviceConstantMemRegistratorsVector();
for (uint32_t i = 0; i < list.size(); i++) {
mDeviceConstantMemList.emplace_back(list[i]());
auto* ptr = list[i]();
if (ptr == nullptr) {
GPUFatal("Error registering constant memory");
}
mDeviceConstantMemList.emplace_back(ptr);
}
}

Expand Down
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/GPUReconstructionDeviceBase.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,7 @@ class GPUReconstructionDeviceBase : public GPUReconstructionCPU
virtual int32_t InitDevice_Runtime() = 0;
int32_t ExitDevice() override;
virtual int32_t ExitDevice_Runtime() = 0;
virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override = 0;
int32_t registerMemoryForGPU_internal(const void* ptr, size_t size) override;
int32_t unregisterMemoryForGPU_internal(const void* ptr) override;
void unregisterRemainingRegisteredMemory();
Expand Down
4 changes: 2 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstructionKernelMacros.h
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@
// GPU Host wrappers for kernel
#define GPUCA_KRNL_HOST(x_class, ...) \
GPUCA_KRNLGPU(x_class, __VA_ARGS__) \
template <> class GPUCA_KRNL_BACKEND_CLASS::backendInternal<GPUCA_M_KRNL_TEMPLATE(x_class)> { \
template <> class GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::backendInternal<GPUCA_M_KRNL_TEMPLATE(x_class)> { \
public: \
template <typename T, typename... Args> \
static inline void runKernelBackendMacro(const krnlSetupTime& _xyz, T* me, const Args&... args) \
Expand All @@ -80,7 +80,7 @@
};

#define GPUCA_KRNL_PROP(x_class, x_attributes) \
template <> gpu_reconstruction_kernels::krnlProperties GPUCA_KRNL_BACKEND_CLASS::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \
template <> gpu_reconstruction_kernels::krnlProperties GPUCA_M_CAT3(GPUReconstruction, GPUCA_GPUTYPE, Backend)::getKernelPropertiesBackend<GPUCA_M_KRNL_TEMPLATE(x_class)>() { \
gpu_reconstruction_kernels::krnlProperties ret = gpu_reconstruction_kernels::krnlProperties{GPUCA_ATTRRES(_EXTRREG, GPUCA_M_STRIP(x_attributes))}; \
return ret.nThreads > 0 ? ret : gpu_reconstruction_kernels::krnlProperties{(int32_t)mThreadCount}; \
}
Expand Down
8 changes: 4 additions & 4 deletions GPU/GPUTracking/Base/GPUReconstructionKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,14 @@ struct classArgument {
};

struct krnlExec {
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUCA_RECO_STEP::NoRecoStep) {}
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {}
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUCA_RECO_STEP st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {}
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d = GPUReconstruction::krnlDeviceType::Auto) : nBlocks(b), nThreads(t), stream(s), device(d), step(GPUDataTypes::RecoStep::NoRecoStep) {}
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(GPUReconstruction::krnlDeviceType::Auto), step(st) {}
constexpr krnlExec(uint32_t b, uint32_t t, int32_t s, GPUReconstruction::krnlDeviceType d, GPUDataTypes::RecoStep st) : nBlocks(b), nThreads(t), stream(s), device(d), step(st) {}
uint32_t nBlocks;
uint32_t nThreads;
int32_t stream;
GPUReconstruction::krnlDeviceType device;
GPUCA_RECO_STEP step;
GPUDataTypes::RecoStep step;
};
struct krnlRunRange {
constexpr krnlRunRange() = default;
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ endif()
message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}")

set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu)
set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h)
set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesHost.h CUDAThrustHelpers.h)
# -------------------------------- Prepare RTC -------------------------------------------------------
enable_language(ASM)
if(ALIGPU_BUILD_TYPE STREQUAL "O2")
Expand Down
Loading