Skip to content

Commit 8ebcfcf

Browse files
committed
GPU: Make TPC CF CF_SCAN_WORKGROUP_SIZE configureable
1 parent 3684fcc commit 8ebcfcf

File tree

9 files changed

+69
-38
lines changed

9 files changed

+69
-38
lines changed

GPU/GPUTracking/Base/GPUProcessor.h

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,7 @@ class GPUProcessor
6363
}
6464

6565
template <size_t alignment = GPUCA_BUFFER_ALIGNMENT>
66-
static inline size_t getAlignmentMod(size_t addr)
66+
static constexpr inline size_t getAlignmentMod(size_t addr)
6767
{
6868
static_assert((alignment & (alignment - 1)) == 0, "Invalid alignment, not power of 2");
6969
if (alignment <= 1) {
@@ -72,7 +72,7 @@ class GPUProcessor
7272
return addr & (alignment - 1);
7373
}
7474
template <size_t alignment = GPUCA_BUFFER_ALIGNMENT>
75-
static inline size_t getAlignment(size_t addr)
75+
static constexpr inline size_t getAlignment(size_t addr)
7676
{
7777
size_t mod = getAlignmentMod<alignment>(addr);
7878
if (mod == 0) {
@@ -81,10 +81,22 @@ class GPUProcessor
8181
return (alignment - mod);
8282
}
8383
template <size_t alignment = GPUCA_BUFFER_ALIGNMENT>
84-
static inline size_t nextMultipleOf(size_t size)
84+
static constexpr inline size_t nextMultipleOf(size_t size)
8585
{
8686
return size + getAlignment<alignment>(size);
8787
}
88+
static constexpr inline size_t nextMultipleOf(size_t size, size_t alignment)
89+
{
90+
if (alignment & (alignment - 1)) {
91+
size_t tmp = size % alignment;
92+
if (tmp) {
93+
size += alignment - tmp;
94+
}
95+
return size;
96+
} else {
97+
return (size + alignment - 1) & ~(alignment - 1);
98+
}
99+
}
88100
template <size_t alignment = GPUCA_BUFFER_ALIGNMENT>
89101
static inline void* alignPointer(void* ptr)
90102
{

GPU/GPUTracking/Definitions/GPUDefParametersConstants.h

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -18,8 +18,6 @@
1818
#define GPUDEFPARAMETERSCONSTANTS_H
1919
// clang-format off
2020

21-
#define GPUCA_THREAD_COUNT_SCAN 512 // TODO: WARNING!!! Must not be GPUTYPE-dependent right now! // TODO: Fix!
22-
2321
#if defined(__CUDACC__) || defined(__HIPCC__)
2422
#define GPUCA_SPECIALIZE_THRUST_SORTS // Not compiled with RTC, so must be compile-time constant
2523
#endif

GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h

Lines changed: 11 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,6 @@
2222

2323
// GPU Run Configuration
2424
#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS) // Avoid including for RTC generation besides normal include protection.
25-
#define GPUCA_LB_SCAN 512
2625
// GPU-architecture-dependent default settings
2726
#if defined(GPUCA_GPUTYPE_MI2xx)
2827
#define GPUCA_WARP_SIZE 64
@@ -499,11 +498,11 @@
499498
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass1Regression GPUCA_LB_GPUTPCNNClusterizerKernels
500499
#define GPUCA_LB_GPUTPCNNClusterizerKernels_publishClass2Regression GPUCA_LB_GPUTPCNNClusterizerKernels
501500

502-
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_LB_SCAN
503-
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_LB_SCAN
504-
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_LB_SCAN
505-
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_LB_SCAN
506-
#define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_LB_SCAN
501+
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanStart GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
502+
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanUp GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
503+
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanTop GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
504+
#define GPUCA_LB_GPUTPCCFStreamCompaction_scanDown GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
505+
#define GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
507506
#define GPUCA_LB_GPUTPCCompressionGatherKernels_unbuffered GPUCA_LB_COMPRESSION_GATHER
508507
#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered32 GPUCA_LB_COMPRESSION_GATHER
509508
#define GPUCA_LB_GPUTPCCompressionGatherKernels_buffered64 GPUCA_LB_COMPRESSION_GATHER
@@ -541,6 +540,9 @@
541540
#ifndef GPUCA_PAR_COMP_GATHER_MODE
542541
#define GPUCA_PAR_COMP_GATHER_MODE 2
543542
#endif
543+
#ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
544+
#define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 512
545+
#endif
544546
#endif // defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_GENRTC) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS)
545547

546548
#ifndef GPUCA_GPUCODE_GENRTC
@@ -578,6 +580,9 @@
578580
#ifndef GPUCA_PAR_NO_ATOMIC_PRECHECK
579581
#define GPUCA_PAR_NO_ATOMIC_PRECHECK 0
580582
#endif
583+
#ifndef GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE
584+
#define GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE 0
585+
#endif
581586
#ifndef GPUCA_PAR_DEDX_STORAGE_TYPE
582587
#define GPUCA_PAR_DEDX_STORAGE_TYPE float
583588
#endif

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
#include "CfChargePos.h"
2424
#include "CfArray2D.h"
2525
#include "GPUGeneralKernels.h"
26+
#include "GPUDefParametersRuntime.h"
2627
#include "GPUTPCCFStreamCompaction.h"
2728
#include "GPUTPCCFChargeMapFiller.h"
2829
#include "GPUTPCCFDecodeZS.h"
@@ -402,27 +403,28 @@ void GPUChainTracking::RunTPCClusterizer_compactPeaks(GPUTPCClusterFinder& clust
402403
exit(1);
403404
}
404405

406+
int32_t scanWorkgroupSize = mRec->getGPUParameters(doGPU).par_CF_SCAN_WORKGROUP_SIZE;
405407
size_t tmpCount = count;
406408
if (nSteps > 1) {
407409
for (uint32_t i = 1; i < nSteps; i++) {
408410
counts.push_back(tmpCount);
409411
if (i == 1) {
410-
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanStart>({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, stage);
412+
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanStart>({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, i, stage);
411413
} else {
412-
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanUp>({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, tmpCount);
414+
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanUp>({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, i, tmpCount);
413415
}
414-
tmpCount = (tmpCount + clusterer.mScanWorkGroupSize - 1) / clusterer.mScanWorkGroupSize;
416+
tmpCount = (tmpCount + scanWorkgroupSize - 1) / scanWorkgroupSize;
415417
}
416418

417-
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanTop>({GetGrid(tmpCount, clusterer.mScanWorkGroupSize, lane), {iSector}}, nSteps, tmpCount);
419+
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanTop>({GetGrid(tmpCount, scanWorkgroupSize, lane), {iSector}}, nSteps, tmpCount);
418420

419421
for (uint32_t i = nSteps - 1; i > 1; i--) {
420422
tmpCount = counts[i - 1];
421-
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanDown>({GetGrid(tmpCount - clusterer.mScanWorkGroupSize, clusterer.mScanWorkGroupSize, lane), {iSector}}, i, clusterer.mScanWorkGroupSize, tmpCount);
423+
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::scanDown>({GetGrid(tmpCount - scanWorkgroupSize, scanWorkgroupSize, lane), {iSector}}, i, scanWorkgroupSize, tmpCount);
422424
}
423425
}
424426

425-
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::compactDigits>({GetGrid(count, clusterer.mScanWorkGroupSize, lane), {iSector}}, 1, stage, in, out);
427+
runKernel<GPUTPCCFStreamCompaction, GPUTPCCFStreamCompaction::compactDigits>({GetGrid(count, scanWorkgroupSize, lane), {iSector}}, 1, stage, in, out);
426428
} else {
427429
auto& nOut = stage ? clusterer.mPmemory->counters.nClusters : clusterer.mPmemory->counters.nPeaks;
428430
auto& nIn = stage ? clusterer.mPmemory->counters.nPeaks : clusterer.mPmemory->counters.nPositions;

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.cxx

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ using namespace o2::gpu::tpccf;
2424
template <>
2525
GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanStart>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage)
2626
{
27+
#ifdef GPUCA_GPUCODE
2728
int32_t nElems = CompactionElems(clusterer, stage);
2829

2930
const auto* predicate = clusterer.mPisPeak;
@@ -35,17 +36,19 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanSta
3536
pred = predicate[iThreadGlobal];
3637
}
3738

38-
int32_t nElemsInBlock = CfUtils::blockPredicateSum<GPUCA_THREAD_COUNT_SCAN>(smem, pred);
39+
int32_t nElemsInBlock = CfUtils::blockPredicateSum<GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE>(smem, pred);
3940

4041
int32_t lastThread = nThreads - 1;
4142
if (iThread == lastThread) {
4243
scanOffset[iBlock] = nElemsInBlock;
4344
}
45+
#endif
4446
}
4547

4648
template <>
4749
GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanUp>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
4850
{
51+
#ifdef GPUCA_GPUCODE
4952
auto* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
5053
auto* scanOffsetNext = clusterer.GetScanBuffer(iBuf);
5154

@@ -59,11 +62,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanUp>
5962
if (iThread == lastThread) {
6063
scanOffsetNext[iBlock] = offsetInBlock;
6164
}
65+
#endif
6266
}
6367

6468
template <>
6569
GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanTop>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t nElems)
6670
{
71+
#ifdef GPUCA_GPUCODE
6772
int32_t iThreadGlobal = get_global_id(0);
6873
int32_t* scanOffset = clusterer.GetScanBuffer(iBuf - 1);
6974

@@ -74,11 +79,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanTop
7479
if (inBounds) {
7580
scanOffset[iThreadGlobal] = offsetInBlock;
7681
}
82+
#endif
7783
}
7884

7985
template <>
8086
GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanDown>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& /*smem*/, processorType& clusterer, int32_t iBuf, uint32_t offset, int32_t nElems)
8187
{
88+
#ifdef GPUCA_GPUCODE
8289
int32_t iThreadGlobal = get_global_id(0) + offset;
8390

8491
int32_t* scanOffsetPrev = clusterer.GetScanBuffer(iBuf - 1);
@@ -89,11 +96,13 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::scanDow
8996
if (iThreadGlobal < nElems) {
9097
scanOffsetPrev[iThreadGlobal] += shift;
9198
}
99+
#endif
92100
}
93101

94102
template <>
95103
GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::compactDigits>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int32_t iBuf, int32_t stage, CfChargePos* in, CfChargePos* out)
96104
{
105+
#ifdef GPUCA_GPUCODE
97106
uint32_t nElems = CompactionElems(clusterer, stage);
98107
SizeT bufferSize = (stage) ? clusterer.mNMaxClusters : clusterer.mNMaxPeaks;
99108

@@ -105,7 +114,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::compact
105114
bool iAmDummy = (iThreadGlobal >= nElems);
106115

107116
int32_t pred = (iAmDummy) ? 0 : predicate[iThreadGlobal];
108-
int32_t offsetInBlock = CfUtils::blockPredicateScan<GPUCA_THREAD_COUNT_SCAN>(smem, pred);
117+
int32_t offsetInBlock = CfUtils::blockPredicateScan<GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE>(smem, pred);
109118

110119
SizeT globalOffsetOut = offsetInBlock;
111120
if (iBlock > 0) {
@@ -129,6 +138,7 @@ GPUdii() void GPUTPCCFStreamCompaction::Thread<GPUTPCCFStreamCompaction::compact
129138
clusterer.mPmemory->counters.nPeaks = nFinal;
130139
}
131140
}
141+
#endif
132142
}
133143

134144
GPUdii() int32_t GPUTPCCFStreamCompaction::CompactionElems(processorType& clusterer, int32_t stage)

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFStreamCompaction.h

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -35,14 +35,14 @@ class GPUTPCCFStreamCompaction : public GPUKernelTemplate
3535
compactDigits = 4,
3636
};
3737

38-
struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int32_t, GPUCA_THREAD_COUNT_SCAN> {
39-
};
4038
#if defined(GPUCA_GPUCODE) && !defined(GPUCA_GPUCODE_NO_LAUNCH_BOUNDS)
41-
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart));
42-
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp));
43-
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop));
44-
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown));
45-
static_assert(GPUCA_THREAD_COUNT_SCAN == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits));
39+
struct GPUSharedMemory : public GPUKernelTemplate::GPUSharedMemoryScan64<int32_t, GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE> {
40+
};
41+
static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanStart));
42+
static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanUp));
43+
static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanTop));
44+
static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_scanDown));
45+
static_assert(GPUCA_PAR_CF_SCAN_WORKGROUP_SIZE == GPUCA_GET_THREAD_COUNT(GPUCA_LB_GPUTPCCFStreamCompaction_compactDigits));
4646
#endif
4747

4848
typedef GPUTPCClusterFinder processorType;

GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.cxx

Lines changed: 12 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,7 @@
1717
#include "GPUMemorySizeScalers.h"
1818
#include "GPUHostDataTypes.h"
1919
#include "GPUSettings.h"
20+
#include "GPUDefParametersRuntime.h"
2021

2122
#include "DataFormatsTPC/ClusterNative.h"
2223
#include "DataFormatsTPC/ZeroSuppression.h"
@@ -90,9 +91,10 @@ void* GPUTPCClusterFinder::SetPointersScratch(void* mem)
9091
computePointerWithAlignment(mem, mPisPeak, mNMaxDigitsFragment);
9192
computePointerWithAlignment(mem, mPchargeMap, TPCMapMemoryLayout<decltype(*mPchargeMap)>::items(mRec->GetProcessingSettings().overrideClusterizerFragmentLen));
9293
computePointerWithAlignment(mem, mPpeakMap, TPCMapMemoryLayout<decltype(*mPpeakMap)>::items(mRec->GetProcessingSettings().overrideClusterizerFragmentLen));
93-
computePointerWithAlignment(mem, mPbuf, mBufSize * mNBufs);
9494
computePointerWithAlignment(mem, mPclusterByRow, GPUCA_ROW_COUNT * mNMaxClusterPerRow);
95-
95+
if ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) {
96+
computePointerWithAlignment(mem, mPscanBuf, mBufSize * mNBufs);
97+
}
9698
return mem;
9799
}
98100

@@ -129,14 +131,15 @@ void GPUTPCClusterFinder::SetMaxData(const GPUTrackingInOutPointers& io)
129131
if (mRec->GetProcessingSettings().tpcIncreasedMinClustersPerRow) {
130132
mNMaxClusterPerRow = std::max<uint32_t>(mNMaxClusterPerRow, mRec->GetProcessingSettings().tpcIncreasedMinClustersPerRow);
131133
}
132-
133-
mBufSize = nextMultipleOf<std::max<int32_t>(GPUCA_MEMALIGN, mScanWorkGroupSize)>(mNMaxDigitsFragment);
134-
mNBufs = getNSteps(mBufSize);
134+
if ((mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding)) {
135+
mBufSize = nextMultipleOf(mNMaxDigitsFragment, std::max<int32_t>(GPUCA_MEMALIGN, mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE));
136+
mNBufs = getNSteps(mBufSize);
137+
}
135138
}
136139

137140
void GPUTPCClusterFinder::SetNMaxDigits(size_t nDigits, size_t nPages, size_t nDigitsFragment, size_t nDigitsEndpointMax)
138141
{
139-
mNMaxDigits = nextMultipleOf<std::max<int32_t>(GPUCA_MEMALIGN, mScanWorkGroupSize)>(nDigits);
142+
mNMaxDigits = nextMultipleOf(nDigits, std::max<int32_t>(GPUCA_MEMALIGN, mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE));
140143
mNMaxPages = nPages;
141144
mNMaxDigitsFragment = nDigitsFragment;
142145
mNMaxDigitsEndpoint = nDigitsEndpointMax;
@@ -148,9 +151,10 @@ uint32_t GPUTPCClusterFinder::getNSteps(size_t items) const
148151
return 0;
149152
}
150153
uint32_t c = 1;
151-
size_t capacity = mScanWorkGroupSize;
154+
const size_t scanWorkgroupSize = mRec->getGPUParameters(mRec->GetRecoStepsGPU() & GPUDataTypes::RecoStep::TPCClusterFinding).par_CF_SCAN_WORKGROUP_SIZE;
155+
size_t capacity = scanWorkgroupSize;
152156
while (items > capacity) {
153-
capacity *= mScanWorkGroupSize;
157+
capacity *= scanWorkgroupSize;
154158
c++;
155159
}
156160
return c;

GPU/GPUTracking/TPCClusterFinder/GPUTPCClusterFinder.h

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,18 +111,17 @@ class GPUTPCClusterFinder : public GPUProcessor
111111
uint32_t* mPclusterInRow = nullptr;
112112
tpc::ClusterNative* mPclusterByRow = nullptr;
113113
GPUTPCClusterMCInterimArray* mPlabelsByRow = nullptr;
114-
int32_t* mPbuf = nullptr;
114+
int32_t* mPscanBuf = nullptr;
115115
Memory* mPmemory = nullptr;
116116

117-
GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPbuf + iBuf * mBufSize; }
117+
GPUdi() int32_t* GetScanBuffer(int32_t iBuf) const { return mPscanBuf + iBuf * mBufSize; }
118118

119119
o2::dataformats::ConstMCTruthContainerView<o2::MCCompLabel> const* mPinputLabels = nullptr;
120120
uint32_t* mPlabelsInRow = nullptr;
121121
uint32_t mPlabelsHeaderGlobalOffset = 0;
122122
uint32_t mPlabelsDataGlobalOffset = 0;
123123

124124
int32_t mISector = 0;
125-
constexpr static int32_t mScanWorkGroupSize = GPUCA_THREAD_COUNT_SCAN;
126125
uint32_t mNMaxClusterPerRow = 0;
127126
uint32_t mNMaxClusters = 0;
128127
uint32_t mNMaxPages = 0;

GPU/GPUTracking/kernels.cmake

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,8 @@ o2_gpu_kernel_add_parameter(NEIGHBOURS_FINDER_MAX_NNEIGHUP
146146
NO_ATOMIC_PRECHECK
147147
COMP_GATHER_KERNEL
148148
COMP_GATHER_MODE
149-
SORT_STARTHITS)
149+
SORT_STARTHITS
150+
CF_SCAN_WORKGROUP_SIZE)
150151

151152
o2_gpu_kernel_add_string_parameter(DEDX_STORAGE_TYPE
152153
MERGER_INTERPOLATION_ERROR_TYPE)

0 commit comments

Comments
 (0)