Skip to content

Commit 3812d54

Browse files
committed
GPU: Fix some global preprocessor defines that were used incorrectly
1 parent 731144f commit 3812d54

File tree

10 files changed

+34
-19
lines changed

10 files changed

+34
-19
lines changed

GPU/Common/GPUCommonDefAPI.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@
4343
#define GPUhd() // Host and device function, inlined during GPU compilation to avoid symbol clashes in host code
4444
#define GPUhdi() inline // Host and device function, to-be-inlined on host and device
4545
#define GPUhdni() // Host and device function, not to-be-inlined automatically
46-
#define GPUg() INVALID_TRIGGER_ERROR_NO_HOST_CODE // GPU kernel
46+
#define GPUg() INVALID_TRIGGER_ERROR_NO_GPU_CODE // GPU kernel
4747
#define GPUshared() // shared memory variable declaration
4848
#define GPUglobal() // global memory variable declaration (only used for kernel input pointers)
4949
#define GPUconstant() // constant memory variable declaraion

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -258,6 +258,9 @@ GPUdii() void GPUTPCCompressionKernels::Thread<GPUTPCCompressionKernels::step1un
258258
break;
259259
}
260260
if (param.rec.tpc.compressionTypeMask & GPUSettings::CompressionDifferences) {
261+
#ifdef GPUCA_GPUCODE
262+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE);
263+
#endif
261264
if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZPadTime) {
262265
CAAlgo::sortInBlock(sortBuffer, sortBuffer + count, GPUTPCCompressionKernels_Compare<GPUSettings::SortZPadTime>(clusters->clusters[iSector][iRow]));
263266
} else if (param.rec.tpc.compressionSortOrder == GPUSettings::SortZTimePad) {

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.h

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -72,15 +72,19 @@ class GPUTPCCompressionGatherKernels : public GPUKernelTemplate
7272
using Vec64 = uint64_t;
7373
using Vec128 = uint4;
7474

75-
struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<uint32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_COMPRESSION_GATHER)> {
75+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32));
76+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64));
77+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_buffered128));
78+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_multiBlock));
79+
struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<uint32_t, GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)> {
7680
union {
77-
uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)];
78-
Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
79-
Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
80-
Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
81+
uint32_t warpOffset[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)];
82+
Vec32 buf32[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
83+
Vec64 buf64[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
84+
Vec128 buf128[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
8185
struct {
82-
uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
83-
uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_COMPRESSION_GATHER)][GPUCA_WARP_SIZE];
86+
uint32_t sizes[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
87+
uint32_t srcOffsets[GPUCA_GET_WARP_COUNT(GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered)][GPUCA_WARP_SIZE];
8488
} unbuffered;
8589
};
8690

GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -492,14 +492,12 @@
492492
#define GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression GPUCA_LB_GPUTPCCFNoiseSuppression
493493
#define GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks GPUCA_LB_GPUTPCCFNoiseSuppression
494494

495-
#ifdef GPUCA_HAS_ONNX
496495
#define GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer GPUCA_LB_GPUTPCNNClusterizerKernels
497496
#define GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN GPUCA_LB_GPUTPCNNClusterizerKernels
498497
#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels GPUCA_LB_GPUTPCNNClusterizerKernels
499498
#define GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels GPUCA_LB_GPUTPCNNClusterizerKernels
500499
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels
501500
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels
502-
#endif
503501

504502
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_THREAD_COUNT_SCAN
505503
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_THREAD_COUNT_SCAN

GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h

Lines changed: 0 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -45,13 +45,6 @@ namespace o2::gpu
4545

4646
// #define GPUCA_KERNEL_DEBUGGER_OUTPUT
4747

48-
// Some assertions to make sure the parameters are not invalid
49-
#if defined(GPUCA_GPUCODE)
50-
static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP, "Invalid GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP");
51-
static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE, "Invalid GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE");
52-
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCompressionKernels_step1unattached) * 2 <= GPUCA_TPC_COMP_CHUNK_SIZE, "Invalid GPUCA_TPC_COMP_CHUNK_SIZE");
53-
#endif
54-
5548
// Derived parameters
5649
#ifdef GPUCA_USE_TEXTURES
5750
#define GPUCA_TEXTURE_FETCH_CONSTRUCTOR // Fetch data through texture cache

GPU/GPUTracking/SectorTracker/GPUTPCNeighboursFinder.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@ class GPUTPCNeighboursFinder : public GPUKernelTemplate
4141
int32_t mIRowUp; // next row number
4242
int32_t mIRowDn; // previous row number
4343
#if GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP > 0
44+
static_assert(GPUCA_MAXN >= GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP);
4445
float mA1[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)];
4546
float mA2[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)];
4647
calink mB[GPUCA_NEIGHBOURS_FINDER_MAX_NNEIGHUP][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNeighboursFinder)];

GPU/GPUTracking/SectorTracker/GPUTPCTrackletSelector.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@ class GPUTPCTrackletSelector : public GPUKernelTemplate
3737
int32_t mNTracklets; // n of tracklets
3838
int32_t mReserved; // for alignment reasons
3939
#if GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0
40+
static_assert(GPUCA_ROW_COUNT >= GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE);
4041
GPUTPCHitId mHits[GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE][GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCTrackletSelector)];
4142
#endif // GPUCA_TRACKLET_SELECTOR_HITS_REG_SIZE != 0
4243
};

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFNoiseSuppression.h

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@ class GPUTPCCFNoiseSuppression : public GPUKernelTemplate
3434
noiseSuppression = 0,
3535
updatePeaks = 1,
3636
};
37-
static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression);
37+
static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression);
38+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_noiseSuppression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFNoiseSuppression_updatePeaks));
39+
3840
struct GPUSharedMemory {
3941
ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE];
4042
PackedCharge buf[SCRATCH_PAD_WORK_GROUP_SIZE * SCRATCH_PAD_NOISE_N];

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,13 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate
3737

3838
struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int32_t, GPUCA_THREAD_COUNT_SCAN> {
3939
};
40+
#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS)
41+
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart));
42+
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp));
43+
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop));
44+
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown));
45+
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits));
46+
#endif
4047

4148
typedef GPUTPCClusterFinder processorType;
4249
GPUhdi() static processorType* Processor(GPUConstantMem& processors)

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizerKernels.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,13 @@ class MCLabelAccumulator;
3737
class GPUTPCNNClusterizerKernels : public GPUKernelTemplate
3838
{
3939
public:
40-
static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels);
40+
// Must all have same number of threads, since they use a common SCRATCH_PAD_WORK_GROUP_SIZE below
41+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_fillInputNN) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer));
42+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass1Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer));
43+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_determineClass2Labels) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer));
44+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer));
45+
static_assert(GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression) == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer));
46+
static constexpr size_t SCRATCH_PAD_WORK_GROUP_SIZE = GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCNNClusterizerKernels_runCfClusterizer);
4147
struct GPUSharedMemory {
4248
// Regular cluster finder
4349
ChargePos posBcast[SCRATCH_PAD_WORK_GROUP_SIZE];

0 commit comments

Comments
 (0)