Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 0 additions & 2 deletions GPU/GPUTracking/Base/GPUReconstruction.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -292,8 +292,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
mProcessingSettings.delayedOutput = false;
}

UpdateAutomaticProcessingSettings();
GPUCA_GPUReconstructionUpdateDefaults();
if (!mProcessingSettings.rtc.enable) {
mProcessingSettings.rtc.optConstexpr = false;
}
Expand Down
3 changes: 2 additions & 1 deletion GPU/GPUTracking/Base/GPUReconstruction.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ struct GPUReconstructionPipelineContext;
struct GPUReconstructionThreading;
class GPUROOTDumpCore;
class ThrustVolatileAllocator;
struct GPUDefParameters;

namespace gpu_reconstruction_kernels
{
Expand Down Expand Up @@ -205,6 +206,7 @@ class GPUReconstruction
GPUOutputControl& OutputControl() { return mOutputControl; }
uint32_t NStreams() const { return mNStreams; }
const void* DeviceMemoryBase() const { return mDeviceMemoryBase; }
virtual const GPUDefParameters& getGPUParameters(bool doGPU) const = 0;

RecoStepField GetRecoSteps() const { return mRecoSteps.steps; }
RecoStepField GetRecoStepsGPU() const { return mRecoSteps.stepsGPUMask; }
Expand Down Expand Up @@ -239,7 +241,6 @@ class GPUReconstruction
void FreeRegisteredMemory(GPUMemoryResource* res);
GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor
int32_t InitPhaseBeforeDevice();
virtual void UpdateAutomaticProcessingSettings() {}
virtual int32_t InitDevice() = 0;
int32_t InitPhasePermanentMemory();
int32_t InitPhaseAfterDevice();
Expand Down
19 changes: 1 addition & 18 deletions GPU/GPUTracking/Base/GPUReconstructionIncludes.h
Original file line number Diff line number Diff line change
Expand Up @@ -29,21 +29,4 @@
#include <iostream>
#include <fstream>

#define GPUCA_GPUReconstructionUpdateDefaults() \
if (mProcessingSettings.alternateBorderSort < 0) { \
mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \
} \
if (mProcessingSettings.mergerSortTracks < 0) { \
mProcessingSettings.mergerSortTracks = GPUCA_SORT_BEFORE_FIT; \
} \
if (param().rec.tpc.looperInterpolationInExtraPass < 0) { \
param().rec.tpc.looperInterpolationInExtraPass = GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION; \
} \
if (mProcessingSettings.tpcCompressionGatherModeKernel < 0) { \
mProcessingSettings.tpcCompressionGatherModeKernel = GPUCA_COMP_GATHER_KERNEL; \
} \
if (mProcessingSettings.tpcCompressionGatherMode < 0) { \
mProcessingSettings.tpcCompressionGatherMode = GPUCA_COMP_GATHER_MODE; \
}

#endif
#endif // GPURECONSTRUCTIONINCLUDES_H
1 change: 1 addition & 0 deletions GPU/GPUTracking/Base/GPUReconstructionProcessing.h
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
uint32_t countToGPU = 0;
uint32_t countToHost = 0;
};
const GPUDefParameters& getGPUParameters(bool doGPU) const override { return *(doGPU ? mParDevice : mParCPU); }

protected:
GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg);
Expand Down
5 changes: 0 additions & 5 deletions GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu
Original file line number Diff line number Diff line change
Expand Up @@ -99,11 +99,6 @@ void GPUReconstructionCUDA::GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>
}
}

void GPUReconstructionCUDA::UpdateAutomaticProcessingSettings()
{
GPUCA_GPUReconstructionUpdateDefaults();
}

int32_t GPUReconstructionCUDA::InitDevice_Runtime()
{
#ifndef __HIPCC__ // CUDA
Expand Down
1 change: 0 additions & 1 deletion GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
protected:
int32_t InitDevice_Runtime() override;
int32_t ExitDevice_Runtime() override;
void UpdateAutomaticProcessingSettings() override;

std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
void SynchronizeGPU() override;
Expand Down
5 changes: 0 additions & 5 deletions GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -60,11 +60,6 @@ int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, cons
return error != CL_SUCCESS;
}

void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings()
{
GPUCA_GPUReconstructionUpdateDefaults();
}

int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
{
if (mMaster == nullptr) {
Expand Down
1 change: 0 additions & 1 deletion GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase

int32_t InitDevice_Runtime() override;
int32_t ExitDevice_Runtime() override;
void UpdateAutomaticProcessingSettings() override;

virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override;

Expand Down
9 changes: 6 additions & 3 deletions GPU/GPUTracking/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -247,14 +247,17 @@ foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST})
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE})
list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME})
endforeach()
set(GPUDEFPARAMETERSLBLIST "$<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,LB_>,\n>\n")
string(APPEND GPUDEFPARAMETERSLBLIST "$<JOIN:$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_PARAMS>,PREPEND,PAR_>,\n>\n")
string(APPEND GPUDEFPARAMETERSLBLIST "$<JOIN:$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_STRING_PARAMS>,PREPEND,PAR_>,\n>")
file(GENERATE
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase
CONTENT "$<JOIN:$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,\n>"
)
CONTENT ${GPUDEFPARAMETERSLBLIST})
add_custom_command(
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h
COMMAND awk "{print(\"#ifndef GPUCA_LB_\" $0 \"\\n#define GPUCA_LB_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h
COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h
COMMENT "Generating GPUDefParametersLoadPrepare.h"
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase
VERBATIM
COMMAND_EXPAND_LISTS
)
Expand Down
16 changes: 10 additions & 6 deletions GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#include "GPUReconstruction.h"
#include "GPUO2DataTypes.h"
#include "GPUMemorySizeScalers.h"
#include "GPUDefParametersRuntime.h"

using namespace o2::gpu;

Expand All @@ -36,20 +37,22 @@ void* GPUTPCCompression::SetPointersOutputHost(void* mem)

void* GPUTPCCompression::SetPointersScratch(void* mem)
{
int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode;
computePointerWithAlignment(mem, mClusterStatus, mMaxClusters);
if (mRec->GetProcessingSettings().tpcCompressionGatherMode >= 2) {
if (gatherMode >= 2) {
computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTracks);
}
if (mRec->GetProcessingSettings().tpcCompressionGatherMode != 1) {
if (gatherMode != 1) {
SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false);
}
return mem;
}

void* GPUTPCCompression::SetPointersOutput(void* mem)
{
int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode;
computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTrackClusters);
if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 1) {
if (gatherMode == 1) {
SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false);
}
return mem;
Expand Down Expand Up @@ -102,12 +105,13 @@ void* GPUTPCCompression::SetPointersMemory(void* mem)
void GPUTPCCompression::RegisterMemoryAllocation()
{
AllocateAndInitializeLate();
int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode;
mMemoryResOutputHost = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputHost, GPUMemoryResource::MEMORY_OUTPUT_FLAG | GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_CUSTOM, "TPCCompressionOutputHost");
if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 3) {
if (gatherMode == 3) {
mMemoryResOutputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputGPU, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCCompressionOutputGPU");
}
uint32_t stackScratch = (mRec->GetProcessingSettings().tpcCompressionGatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0;
if (mRec->GetProcessingSettings().tpcCompressionGatherMode < 2) {
uint32_t stackScratch = (gatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0;
if (gatherMode < 2) {
mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | stackScratch, "TPCCompressionOutput");
}
mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersScratch, GPUMemoryResource::MEMORY_SCRATCH | stackScratch, "TPCCompressionScratch");
Expand Down
2 changes: 1 addition & 1 deletion GPU/GPUTracking/Definitions/GPUDefConstantsAndSettings.h
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
/// \author David Rohr

// This files contains compile-time constants affecting the GPU algorithms / reconstruction results.
// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters.h
// Architecture-dependant compile-time constants affecting the performance without changing the results are stored in GPUDefParameters*.h

#ifndef GPUDEFCONSTANTSANDSETTINGS_H
#define GPUDEFCONSTANTSANDSETTINGS_H
Expand Down
45 changes: 1 addition & 44 deletions GPU/GPUTracking/Definitions/GPUDefParametersConstants.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,12 @@
#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!

#if defined(__CUDACC__) || defined(__HIPCC__)
#define GPUCA_SPECIALIZE_THRUST_SORTS
#define GPUCA_SPECIALIZE_THRUST_SORTS // Not compiled with RTC, so must be compile-time constant
#endif

#define GPUCA_MAX_THREADS 1024
#define GPUCA_MAX_STREAMS 36

#if defined(GPUCA_GPUCODE)
#define GPUCA_SORT_STARTHITS // Sort the start hits when running on GPU
#endif

#define GPUCA_ROWALIGNMENT 16 // Align of Row Hits and Grid
#define GPUCA_BUFFER_ALIGNMENT 64 // Alignment of buffers obtained from SetPointers
#define GPUCA_MEMALIGN (64 * 1024) // Alignment of allocated memory blocks
Expand All @@ -44,44 +40,5 @@
#define GPUCA_GPU_STACK_SIZE ((size_t) 8 * 1024) // Stack size per GPU thread
#define GPUCA_GPU_HEAP_SIZE ((size_t) 16 * 1025 * 1024) // Stack size per GPU thread

#ifdef GPUCA_GPUCODE
#ifndef GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 6
#endif
#ifndef GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 12
#endif
#ifndef GPUCA_ALTERNATE_BORDER_SORT
#define GPUCA_ALTERNATE_BORDER_SORT 0
#endif
#ifndef GPUCA_SORT_BEFORE_FIT
#define GPUCA_SORT_BEFORE_FIT 0
#endif
#ifndef GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
#endif
#ifndef GPUCA_COMP_GATHER_KERNEL
#define GPUCA_COMP_GATHER_KERNEL 0
#endif
#ifndef GPUCA_COMP_GATHER_MODE
#define GPUCA_COMP_GATHER_MODE 2
#endif
#else
#define GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP 0
#define GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE 0
#define GPUCA_ALTERNATE_BORDER_SORT 0
#define GPUCA_SORT_BEFORE_FIT 0
#define GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION 0
#define GPUCA_THREAD_COUNT_FINDER 1
#define GPUCA_COMP_GATHER_KERNEL 0
#define GPUCA_COMP_GATHER_MODE 0
#endif
#ifndef GPUCA_DEDX_STORAGE_TYPE
#define GPUCA_DEDX_STORAGE_TYPE float
#endif
#ifndef GPUCA_MERGER_INTERPOLATION_ERROR_TYPE
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE float
#endif

// clang-format on
#endif // GPUDEFPARAMETERSCONSTANTS_H
Loading