Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion GPU/Common/GPUCommonDefAPI.h
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@
#define GPUhd() // Host and device function, inlined during GPU compilation to avoid symbol clashes in host code
#define GPUhdi() inline // Host and device function, to-be-inlined on host and device
#define GPUhdni() // Host and device function, not to-be-inlined automatically
#define GPUg() INVALID_TRIGGER_ERROR_NO_HOST_CODE // GPU kernel
#define GPUg() INVALID_TRIGGER_ERROR_NO_GPU_CODE // GPU kernel
#define GPUshared() // shared memory variable declaration
#define GPUglobal() // global memory variable declaration (only used for kernel input pointers)
#define GPUconstant() // constant memory variable declaraion
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/GPUReconstructionCPU.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
#include "GPUReconstructionIncludes.h"
#include "GPUReconstructionThreading.h"
#include "GPUChain.h"
#include "GPUDefParameters.h"
#include "GPUDefParametersRuntime.h"
#include "GPUTPCClusterData.h"
#include "GPUTPCSectorOutCluster.h"
#include "GPUTPCGMMergedTrack.h"
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Base/cuda/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ if(DEFINED CUDA_COMPUTETARGET)
endif()
message(STATUS "Building GPUTracking with CUDA support ${TMP_TARGET}")

set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu)
set(SRCS GPUReconstructionCUDA.cu GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDARTCCalls.cu)
set(HDRS GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDADef.h GPUReconstructionCUDAIncludesSystem.h)
# -------------------------------- Prepare RTC -------------------------------------------------------
enable_language(ASM)
Expand Down
9 changes: 3 additions & 6 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
constexpr int32_t reqVerMin = 0;
#endif
if (mProcessingSettings.rtc.enable && mProcessingSettings.rtctech.runTest == 2) {
mWarpSize = GPUCA_WARP_SIZE;
genAndLoadRTC();
exit(0);
}
Expand Down Expand Up @@ -244,16 +245,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
GPUInfo("\ttextureAlignment = %ld", (uint64_t)deviceProp.textureAlignment);
GPUInfo(" ");
}
if (deviceProp.warpSize != GPUCA_WARP_SIZE) {
if (deviceProp.warpSize != GPUCA_WARP_SIZE && !mProcessingSettings.rtc.enable) {
throw std::runtime_error("Invalid warp size on GPU");
}
mWarpSize = deviceProp.warpSize;
mBlockCount = deviceProp.multiProcessorCount;
mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount);
#ifndef __HIPCC__ // CUDA
mWarpSize = 32;
#else // HIP
mWarpSize = 64;
#endif
mDeviceName = deviceProp.name;
mDeviceName += " (CUDA GPU)";

Expand Down
3 changes: 1 addition & 2 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -45,8 +45,6 @@ class GPUReconstructionCUDABackend : public GPUReconstructionDeviceBase
template <class T, int32_t I = 0, typename... Args>
void runKernelBackendInternal(const krnlSetupTime& _xyz, const Args&... args);

void getRTCKernelCalls(std::vector<std::string>& kernels);

template <class T, class S>
friend GPUh() void GPUCommonAlgorithm::sortOnDevice(auto* rec, int32_t stream, T* begin, size_t N, const S& comp);
GPUReconstructionCUDAInternals* mInternals;
Expand Down Expand Up @@ -91,6 +89,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC

private:
int32_t genRTC(std::string& filename, uint32_t& nCompile);
void getRTCKernelCalls(std::vector<std::string>& kernels);
void genAndLoadRTC();
void loadKernelModules(bool perKernel);
const char *mRtcSrcExtension = ".src", *mRtcBinExtension = ".o";
Expand Down
3 changes: 2 additions & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,8 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
}
fclose(fp);
}
const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true);
const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) +
"#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n";
if (mProcessingSettings.rtctech.printLaunchBounds || mProcessingSettings.debugLevel >= 3) {
GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str());
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
/// \file GPUReconstructionCUDAIncludesSystem.h
/// \author David Rohr

#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H
#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDES_H
#ifndef O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H
#define O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H

#include <cstdint>
#include <type_traits>
Expand All @@ -32,4 +32,4 @@
#include <sm_20_atomic_functions.h>
#include <cuda_fp16.h>

#endif
#endif // O2_GPU_GPURECONSTRUCTIONCUDAINCLUDESSYSTEM_H
11 changes: 0 additions & 11 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAKernels.cu
Original file line number Diff line number Diff line change
Expand Up @@ -118,14 +118,3 @@ static GPUReconstructionDeviceBase::deviceConstantMemRegistration registerConstS
return retVal;
});
#endif

void GPUReconstructionCUDABackend::getRTCKernelCalls(std::vector<std::string>& kernels)
{
#undef GPUCA_KRNL_LB
#undef __launch_bounds__
#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__)));
#define GPUCA_KRNL_LB(x_class, x_attributes, ...) GPUCA_KRNL(x_class, (REG, (GPUCA_M_CAT(GPUCA_RTC_LB_, GPUCA_M_KRNL_NAME(x_class))), GPUCA_M_STRIP(x_attributes)), __VA_ARGS__)
#include "GPUReconstructionKernelList.h"
#undef GPUCA_KRNL
#undef GPUCA_KRNL_LB
}
32 changes: 32 additions & 0 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDARTCCalls.cu
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUReconstructionCUDARTCCalls.cu
/// \author David Rohr

#define GPUCA_GPUCODE_HOSTONLY
#define GPUCA_GPUCODE_NO_LAUNCH_BOUNDS

#define GPUCA_KRNL_REG(args) __launch_bounds__(GPUCA_M_STRIP(args))

#include "GPUReconstructionCUDAIncludesSystem.h"
#include "GPUReconstructionCUDADef.h"
#include "GPUReconstructionCUDA.h"

using namespace o2::gpu;

void GPUReconstructionCUDA::getRTCKernelCalls(std::vector<std::string>& kernels)
{
#undef GPUCA_KRNL
#define GPUCA_KRNL(...) kernels.emplace_back(GPUCA_M_STR(GPUCA_KRNLGPU(__VA_ARGS__)));
#undef __launch_bounds__
#include "GPUReconstructionKernelList.h"
}
26 changes: 18 additions & 8 deletions GPU/GPUTracking/Base/hip/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -24,20 +24,30 @@ message(STATUS "Building GPUTracking with HIP support ${TMP_TARGET}")
if(NOT DEFINED GPUCA_HIP_HIPIFY_FROM_CUDA OR "${GPUCA_HIP_HIPIFY_FROM_CUDA}")
set(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_BINARY_DIR}/hipify)
file(MAKE_DIRECTORY ${GPUCA_HIP_SOURCE_DIR})
set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu)
set(GPUCA_HIP_FILE_LIST GPUReconstructionCUDA.cu GPUReconstructionCUDAExternalProvider.cu GPUReconstructionCUDA.h GPUReconstructionCUDAInternals.h GPUReconstructionCUDAHelpers.inc GPUReconstructionCUDAkernel.template.cu GPUReconstructionCUDADef.h GPUReconstructionCUDAGenRTC.cxx GPUReconstructionCUDAKernels.cu GPUReconstructionCUDArtc.cu GPUReconstructionCUDARTCCalls.cu)
set(GPUCA_HIP_LOCAL_FILE_LIST GPUReconstructionHIPIncludesSystem.h)
set(HIP_SOURCES "")
foreach(file ${GPUCA_HIP_FILE_LIST})
get_filename_component(ABS_CUDA_SORUCE ../cuda/${file} ABSOLUTE)
get_filename_component(CUDA_SOURCE ${file} NAME)
get_filename_component(CUDA_SOURCE_EXT ${file} EXT)
string(REPLACE ".cu" ".hip" HIP_SOURCE1 ${CUDA_SOURCE})
string(REPLACE "CUDA" "HIP" HIP_SOURCE ${HIP_SOURCE1})
add_custom_command(
OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}
COMMAND ${hip_HIPIFY_PERL_EXECUTABLE} --quiet-warnings ${ABS_CUDA_SORUCE} | sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}
DEPENDS ${ABS_CUDA_SORUCE}
COMMENT "Hippifying ${HIP_SOURCE}"
)
if(CUDA_SOURCE_EXT STREQUAL ".cu" OR CUDA_SOURCE_EXT STREQUAL ".h")
add_custom_command(
OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}
COMMAND ${hip_HIPIFY_PERL_EXECUTABLE} --quiet-warnings ${ABS_CUDA_SORUCE} | sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}
DEPENDS ${ABS_CUDA_SORUCE}
COMMENT "Hippifying ${HIP_SOURCE}"
)
else()
add_custom_command(
OUTPUT ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}
COMMAND sed -e 's/CUDA/HIP/g' -e 's/cuda/hip/g' ${ABS_CUDA_SORUCE} > ${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}
DEPENDS ${ABS_CUDA_SORUCE}
COMMENT "Generating HIP source ${HIP_SOURCE}"
)
endif()
list(APPEND HIP_SOURCES "${GPUCA_HIP_SOURCE_DIR}/${HIP_SOURCE}")
endforeach()
foreach(file ${GPUCA_HIP_LOCAL_FILE_LIST})
Expand All @@ -61,7 +71,7 @@ else()
get_filename_component(GPUCA_HIP_SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR} ABSOLUTE)
endif()

set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip)
set(SRCS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPKernels.hip ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPRTCCalls.hip)
set(SRCS_CXX ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPGenRTC.cxx)
set(HDRS ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIP.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPInternals.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPHelpers.inc ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPDef.h ${GPUCA_HIP_SOURCE_DIR}/GPUReconstructionHIPIncludesSystem.h)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,8 @@
/// \file GPUReconstructionHIPIncludesSystem.h
/// \author David Rohr

#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDES_H
#define O2_GPU_RECONSTRUCTIONHIPINCLUDES_H
#ifndef O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H
#define O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H

#include <hip/hip_runtime.h>
#include <hip/hip_ext.h>
Expand All @@ -25,4 +25,4 @@
#include <thrust/device_ptr.h>
#pragma GCC diagnostic pop

#endif
#endif // O2_GPU_RECONSTRUCTIONHIPINCLUDESSYSTEM_H
8 changes: 5 additions & 3 deletions GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,9 @@ set(HDRS_INSTALL
DataTypes/GPUTriggerOutputs.h
Debug/GPUROOTDump.h
Definitions/GPUDefConstantsAndSettings.h
Definitions/GPUDefParametersDefault.h
Definitions/GPUDefParametersWrapper.h
Definitions/GPUDefParametersConstants.h
Definitions/GPUDefParametersDefaults.h
Definitions/GPUDef.h
Definitions/GPUDefMacros.h
Definitions/GPULogging.h
Expand Down Expand Up @@ -234,7 +236,7 @@ set(TEMPLATE_HEADER_LIST Base/GPUReconstructionKernelList.template.h
Base/GPUReconstructionKernelIncludes.template.h
Base/GPUReconstructionIncludesDeviceAll.template.h
cmake/GPUNoFastMathKernels.template.h
Definitions/GPUDefParameters.template.h
Definitions/GPUDefParametersRuntime.template.h
Definitions/GPUDefParametersLoad.template.inc)
set(GENERATED_HEADERS_LIST "")

Expand All @@ -258,7 +260,7 @@ add_custom_command(
)
list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h)

set(HDRS_INSTALL ${HDRS_INSTALL} ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUReconstructionKernelList.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParameters.h ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoad.inc ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h)
set(HDRS_INSTALL ${HDRS_INSTALL} ${GENERATED_HEADERS_LIST})
include(kernels.cmake)

# Optional sources depending on optional dependencies
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -258,6 +258,9 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
break;
}
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionDifferences) {
#ifdef GPUCA_GPUCODE
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE);
#endif
if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) {
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>(clusters->clusters[iSector][iRow]));
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) {
Expand Down
18 changes: 11 additions & 7 deletions GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,15 +72,19 @@ class GPUTPCCompressionGatherKernels : public GPUKernelTemplate
using Vec64 = uint64_t;
using Vec128 = uint4;

struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<uint32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_COMPRESSION_GATHER)> {
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32));
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64));
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128));
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock));
struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<uint32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)> {
union {
uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)];
Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)];
Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
struct {
uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
} unbuffered;
};

Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Definitions/GPUDef.h
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include "GPUCommonDef.h"
#include "GPUDefConstantsAndSettings.h"
#include "GPUDefParametersDefault.h"
#include "GPUDefParametersWrapper.h"
#include "GPUCommonRtypes.h"

// Macros for masking ptrs in OpenCL kernel calls as uint64_t (The API only allows us to pass buffer objects)
Expand Down
87 changes: 87 additions & 0 deletions GPU/GPUTracking/Definitions/GPUDefParametersConstants.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
// Copyright 2019-2020 CERN and copyright holders of ALICE O2.
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
// All rights not expressly granted are reserved.
//
// This software is distributed under the terms of the GNU General Public
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
//
// In applying this license CERN does not waive the privileges and immunities
// granted to it by virtue of its status as an Intergovernmental Organization
// or submit itself to any jurisdiction.

/// \file GPUDefParametersConstants.h
/// \author David Rohr

// This file contains compile-time constants, independent from the backend

#ifndef GPUDEFPARAMETERSCONSTANTS_H
#define GPUDEFPARAMETERSCONSTANTS_H
// clang-format off

#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!

#if defined(__CUDACC__) || defined(__HIPCC__)
#define GPUCA_SPECIALIZE_THRUST_SORTS
#endif

#define GPUCA_MAX_THREADS 1024
#define GPUCA_MAX_STREAMS 36

#if defined(GPUCA_GPUCODE)
#define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU
#endif

#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid
#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers
#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks

// Default maximum numbers
#define GPUCA_MAX_CLUSTERS ((size_t) 1024 * 1024 * 1024) // Maximum number of TPC clusters
#define GPUCA_MAX_TRD_TRACKLETS ((size_t) 128 * 1024) // Maximum number of TRD tracklets
#define GPUCA_MAX_ITS_FIT_TRACKS ((size_t) 96 * 1024) // Max number of tracks for ITS track fit
#define GPUCA_MEMORY_SIZE ((size_t) 6 * 1024 * 1024 * 1024) // Size of memory allocated on Device
#define GPUCA_HOST_MEMORY_SIZE ((size_t) 1 * 1024 * 1024 * 1024) // Size of memory allocated on Host
#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread
#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread

#ifdef GPUCA_GPUCODE
#ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
#endif
#ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
#endif
#ifndef GPUCA_ALTERNATE_BORDER_SORT
#define GPUCA_ALTERNATE_BORDER_SORT 0
#endif
#ifndef GPUCA_SORT_BEFORE_FIT
#define GPUCA_SORT_BEFORE_FIT 0
#endif
#ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
#endif
#ifndef GPUCA_COMP_GATHER_KERNEL
#define GPUCA_COMP_GATHER_KERNEL 0
#endif
#ifndef GPUCA_COMP_GATHER_MODE
#define GPUCA_COMP_GATHER_MODE 2
#endif
#else
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0
#define GPUCA_ALTERNATE_BORDER_SORT 0
#define GPUCA_SORT_BEFORE_FIT 0
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
#define GPUCA_THREAD_COUNT_FINDER 1
#define GPUCA_COMP_GATHER_KERNEL 0
#define GPUCA_COMP_GATHER_MODE 0
#endif
#ifndef GPUCA_DEDX_STORAGE_TYPE
#define GPUCA_DEDX_STORAGE_TYPE float
#endif
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#endif

// clang-format on
#endif // GPUDEFPARAMETERSCONSTANTS_H
Loading