Skip to content

Commit efbf147

Browse files
committed
GPU: Move compile-time constant parameters to new scheme with runtimeParameter struct and automatic RTC-generated defines
With this, we can revert the workaround to have the parameters as defines in the wrapper file
1 parent 64ef6b9 commit efbf147

34 files changed

+466
-419
lines changed

GPU/GPUTracking/Base/GPUReconstruction.cxx

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -292,8 +292,6 @@ int32_t GPUReconstruction::InitPhaseBeforeDevice()
292292
mProcessingSettings.delayedOutput = false;
293293
}
294294

295-
UpdateAutomaticProcessingSettings();
296-
GPUCA_GPUReconstructionUpdateDefaults();
297295
if (!mProcessingSettings.rtc.enable) {
298296
mProcessingSettings.rtc.optConstexpr = false;
299297
}

GPU/GPUTracking/Base/GPUReconstruction.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ struct GPUReconstructionPipelineContext;
4848
struct GPUReconstructionThreading;
4949
class GPUROOTDumpCore;
5050
class ThrustVolatileAllocator;
51+
struct GPUDefParameters;
5152

5253
namespace gpu_reconstruction_kernels
5354
{
@@ -205,6 +206,7 @@ class GPUReconstruction
205206
GPUOutputControl& OutputControl() { return mOutputControl; }
206207
uint32_t NStreams() const { return mNStreams; }
207208
const void* DeviceMemoryBase() const { return mDeviceMemoryBase; }
209+
virtual const GPUDefParameters& getGPUParameters(bool doGPU) const = 0;
208210

209211
RecoStepField GetRecoSteps() const { return mRecoSteps.steps; }
210212
RecoStepField GetRecoStepsGPU() const { return mRecoSteps.stepsGPUMask; }
@@ -239,7 +241,6 @@ class GPUReconstruction
239241
void FreeRegisteredMemory(GPUMemoryResource* res);
240242
GPUReconstruction(const GPUSettingsDeviceBackend& cfg); // Constructor
241243
int32_t InitPhaseBeforeDevice();
242-
virtual void UpdateAutomaticProcessingSettings() {}
243244
virtual int32_t InitDevice() = 0;
244245
int32_t InitPhasePermanentMemory();
245246
int32_t InitPhaseAfterDevice();

GPU/GPUTracking/Base/GPUReconstructionIncludes.h

Lines changed: 1 addition & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -29,21 +29,4 @@
2929
#include <iostream>
3030
#include <fstream>
3131

32-
#define GPUCA_GPUReconstructionUpdateDefaults() \
33-
if (mProcessingSettings.alternateBorderSort < 0) { \
34-
mProcessingSettings.alternateBorderSort = GPUCA_ALTERNATE_BORDER_SORT; \
35-
} \
36-
if (mProcessingSettings.mergerSortTracks < 0) { \
37-
mProcessingSettings.mergerSortTracks = GPUCA_SORT_BEFORE_FIT; \
38-
} \
39-
if (param().rec.tpc.looperInterpolationInExtraPass < 0) { \
40-
param().rec.tpc.looperInterpolationInExtraPass = GPUCA_MERGER_SPLIT_LOOP_INTERPOLATION; \
41-
} \
42-
if (mProcessingSettings.tpcCompressionGatherModeKernel < 0) { \
43-
mProcessingSettings.tpcCompressionGatherModeKernel = GPUCA_COMP_GATHER_KERNEL; \
44-
} \
45-
if (mProcessingSettings.tpcCompressionGatherMode < 0) { \
46-
mProcessingSettings.tpcCompressionGatherMode = GPUCA_COMP_GATHER_MODE; \
47-
}
48-
49-
#endif
32+
#endif // GPURECONSTRUCTIONINCLUDES_H

GPU/GPUTracking/Base/GPUReconstructionProcessing.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
101101
uint32_t countToGPU = 0;
102102
uint32_t countToHost = 0;
103103
};
104+
const GPUDefParameters& getGPUParameters(bool doGPU) const override { return *(doGPU ? mParDevice : mParCPU); }
104105

105106
protected:
106107
GPUReconstructionProcessing(const GPUSettingsDeviceBackend& cfg);

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -99,11 +99,6 @@ void GPUReconstructionCUDA::GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>
9999
}
100100
}
101101

102-
void GPUReconstructionCUDA::UpdateAutomaticProcessingSettings()
103-
{
104-
GPUCA_GPUReconstructionUpdateDefaults();
105-
}
106-
107102
int32_t GPUReconstructionCUDA::InitDevice_Runtime()
108103
{
109104
#ifndef __HIPCC__ // CUDA

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -59,7 +59,6 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
5959
protected:
6060
int32_t InitDevice_Runtime() override;
6161
int32_t ExitDevice_Runtime() override;
62-
void UpdateAutomaticProcessingSettings() override;
6362

6463
std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
6564
void SynchronizeGPU() override;

GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.cxx

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -60,11 +60,6 @@ int32_t GPUReconstructionOCLBackend::GPUChkErrInternal(const int64_t error, cons
6060
return error != CL_SUCCESS;
6161
}
6262

63-
void GPUReconstructionOCLBackend::UpdateAutomaticProcessingSettings()
64-
{
65-
GPUCA_GPUReconstructionUpdateDefaults();
66-
}
67-
6863
int32_t GPUReconstructionOCLBackend::InitDevice_Runtime()
6964
{
7065
if (mMaster == nullptr) {

GPU/GPUTracking/Base/opencl/GPUReconstructionOCL.h

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,6 @@ class GPUReconstructionOCLBackend : public GPUReconstructionDeviceBase
3737

3838
int32_t InitDevice_Runtime() override;
3939
int32_t ExitDevice_Runtime() override;
40-
void UpdateAutomaticProcessingSettings() override;
4140

4241
virtual int32_t GPUChkErrInternal(const int64_t error, const char* file, int32_t line) const override;
4342

GPU/GPUTracking/CMakeLists.txt

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -247,14 +247,17 @@ foreach(TEMPLATE_FILE ${TEMPLATE_HEADER_LIST})
247247
file(GENERATE OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME} INPUT ${TEMPLATE_FILE})
248248
list(APPEND GENERATED_HEADERS_LIST ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/${OUTPUT_FILE_NAME})
249249
endforeach()
250+
set(GPUDEFPARAMETERSLBLIST "$<JOIN:$<LIST:TRANSFORM,$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,PREPEND,LB_>,\n>\n")
251+
string(APPEND GPUDEFPARAMETERSLBLIST "$<JOIN:$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_PARAMS>,PREPEND,PAR_>,\n>\n")
252+
string(APPEND GPUDEFPARAMETERSLBLIST "$<JOIN:$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_STRING_PARAMS>,PREPEND,PAR_>,\n>")
250253
file(GENERATE
251254
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase
252-
CONTENT "$<JOIN:$<LIST:TRANSFORM,$<TARGET_PROPERTY:O2_GPU_KERNELS,O2_GPU_KERNEL_NAMES>,REPLACE,[^A-Za-z0-9]+,_>,\n>"
253-
)
255+
CONTENT ${GPUDEFPARAMETERSLBLIST})
254256
add_custom_command(
255257
OUTPUT ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h
256-
COMMAND awk "{print(\"#ifndef GPUCA_LB_\" $0 \"\\n#define GPUCA_LB_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h
258+
COMMAND awk "{print(\"#ifndef GPUCA_\" $0 \"\\n#define GPUCA_\" $0 \" 0\\n#endif\")}" ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase > ${CMAKE_CURRENT_BINARY_DIR}/include_gpu_onthefly/GPUDefParametersLoadPrepare.h
257259
COMMENT "Generating GPUDefParametersLoadPrepare.h"
260+
DEPENDS ${CMAKE_CURRENT_BINARY_DIR}/GPUDefParametersLoadPrepareBase
258261
VERBATIM
259262
COMMAND_EXPAND_LISTS
260263
)

GPU/GPUTracking/DataCompression/GPUTPCCompression.cxx

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,7 @@
1616
#include "GPUReconstruction.h"
1717
#include "GPUO2DataTypes.h"
1818
#include "GPUMemorySizeScalers.h"
19+
#include "GPUDefParametersRuntime.h"
1920

2021
using namespace o2::gpu;
2122

@@ -36,20 +37,22 @@ void* GPUTPCCompression::SetPointersOutputHost(void* mem)
3637

3738
void* GPUTPCCompression::SetPointersScratch(void* mem)
3839
{
40+
int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode;
3941
computePointerWithAlignment(mem, mClusterStatus, mMaxClusters);
40-
if (mRec->GetProcessingSettings().tpcCompressionGatherMode >= 2) {
42+
if (gatherMode >= 2) {
4143
computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTracks);
4244
}
43-
if (mRec->GetProcessingSettings().tpcCompressionGatherMode != 1) {
45+
if (gatherMode != 1) {
4446
SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false);
4547
}
4648
return mem;
4749
}
4850

4951
void* GPUTPCCompression::SetPointersOutput(void* mem)
5052
{
53+
int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode;
5154
computePointerWithAlignment(mem, mAttachedClusterFirstIndex, mMaxTrackClusters);
52-
if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 1) {
55+
if (gatherMode == 1) {
5356
SetPointersCompressedClusters(mem, mPtrs, mMaxTrackClusters, mMaxTracks, mMaxClustersInCache, false);
5457
}
5558
return mem;
@@ -102,12 +105,13 @@ void* GPUTPCCompression::SetPointersMemory(void* mem)
102105
void GPUTPCCompression::RegisterMemoryAllocation()
103106
{
104107
AllocateAndInitializeLate();
108+
int32_t gatherMode = mRec->GetProcessingSettings().tpcCompressionGatherMode == -1 ? mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCCompression).par_COMP_GATHER_MODE : mRec->GetProcessingSettings().tpcCompressionGatherMode;
105109
mMemoryResOutputHost = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputHost, GPUMemoryResource::MEMORY_OUTPUT_FLAG | GPUMemoryResource::MEMORY_HOST | GPUMemoryResource::MEMORY_CUSTOM, "TPCCompressionOutputHost");
106-
if (mRec->GetProcessingSettings().tpcCompressionGatherMode == 3) {
110+
if (gatherMode == 3) {
107111
mMemoryResOutputGPU = mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutputGPU, GPUMemoryResource::MEMORY_SCRATCH | GPUMemoryResource::MEMORY_GPU | GPUMemoryResource::MEMORY_CUSTOM | GPUMemoryResource::MEMORY_STACK, "TPCCompressionOutputGPU");
108112
}
109-
uint32_t stackScratch = (mRec->GetProcessingSettings().tpcCompressionGatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0;
110-
if (mRec->GetProcessingSettings().tpcCompressionGatherMode < 2) {
113+
uint32_t stackScratch = (gatherMode != 3) ? GPUMemoryResource::MEMORY_STACK : 0;
114+
if (gatherMode < 2) {
111115
mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersOutput, GPUMemoryResource::MEMORY_OUTPUT | stackScratch, "TPCCompressionOutput");
112116
}
113117
mRec->RegisterMemoryAllocation(this, &GPUTPCCompression::SetPointersScratch, GPUMemoryResource::MEMORY_SCRATCH | stackScratch, "TPCCompressionScratch");

0 commit comments

Comments
 (0)