Skip to content

Commit ce427f7

Browse files
committed
GPU: Use standard <cstdint> types in TPC clusterizer instead of own uint/ushort/etc.
1 parent 281505d commit ce427f7

27 files changed

+182
-195
lines changed

GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -65,7 +65,7 @@
6565
#define int64_t long
6666
#define int32_t int
6767
#define int16_t short
68-
#define int8_t char
68+
#define int8_t signed char
6969

7070
// Disable assertions since they produce errors in GPU Code
7171
#ifdef assert

GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -348,13 +348,13 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy<uint8_t>(uint8_t*
348348
CONSTEXPR const int32_t vec32Elems = CpyVector<uint8_t, Vec32>::Size;
349349
CONSTEXPR const int32_t vec16Elems = CpyVector<uint8_t, Vec16>::Size;
350350

351-
if (size >= uint(nThreads * vec128Elems)) {
351+
if (size >= uint32_t(nThreads * vec128Elems)) {
352352
compressorMemcpyVectorised<uint8_t, Vec128>(dst, src, size, nThreads, iThread);
353-
} else if (size >= uint(nThreads * vec64Elems)) {
353+
} else if (size >= uint32_t(nThreads * vec64Elems)) {
354354
compressorMemcpyVectorised<uint8_t, Vec64>(dst, src, size, nThreads, iThread);
355-
} else if (size >= uint(nThreads * vec32Elems)) {
355+
} else if (size >= uint32_t(nThreads * vec32Elems)) {
356356
compressorMemcpyVectorised<uint8_t, Vec32>(dst, src, size, nThreads, iThread);
357-
} else if (size >= uint(nThreads * vec16Elems)) {
357+
} else if (size >= uint32_t(nThreads * vec16Elems)) {
358358
compressorMemcpyVectorised<uint8_t, Vec16>(dst, src, size, nThreads, iThread);
359359
} else {
360360
compressorMemcpyBasic(dst, src, size, nThreads, iThread);
@@ -368,11 +368,11 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy<uint16_t>(uint16_t
368368
CONSTEXPR const int32_t vec64Elems = CpyVector<uint16_t, Vec64>::Size;
369369
CONSTEXPR const int32_t vec32Elems = CpyVector<uint16_t, Vec32>::Size;
370370

371-
if (size >= uint(nThreads * vec128Elems)) {
371+
if (size >= uint32_t(nThreads * vec128Elems)) {
372372
compressorMemcpyVectorised<uint16_t, Vec128>(dst, src, size, nThreads, iThread);
373-
} else if (size >= uint(nThreads * vec64Elems)) {
373+
} else if (size >= uint32_t(nThreads * vec64Elems)) {
374374
compressorMemcpyVectorised<uint16_t, Vec64>(dst, src, size, nThreads, iThread);
375-
} else if (size >= uint(nThreads * vec32Elems)) {
375+
} else if (size >= uint32_t(nThreads * vec32Elems)) {
376376
compressorMemcpyVectorised<uint16_t, Vec32>(dst, src, size, nThreads, iThread);
377377
} else {
378378
compressorMemcpyBasic(dst, src, size, nThreads, iThread);
@@ -385,9 +385,9 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy<uint32_t>(uint32_t
385385
CONSTEXPR const int32_t vec128Elems = CpyVector<uint32_t, Vec128>::Size;
386386
CONSTEXPR const int32_t vec64Elems = CpyVector<uint32_t, Vec64>::Size;
387387

388-
if (size >= uint(nThreads * vec128Elems)) {
388+
if (size >= uint32_t(nThreads * vec128Elems)) {
389389
compressorMemcpyVectorised<uint32_t, Vec128>(dst, src, size, nThreads, iThread);
390-
} else if (size >= uint(nThreads * vec64Elems)) {
390+
} else if (size >= uint32_t(nThreads * vec64Elems)) {
391391
compressorMemcpyVectorised<uint32_t, Vec64>(dst, src, size, nThreads, iThread);
392392
} else {
393393
compressorMemcpyBasic(dst, src, size, nThreads, iThread);

GPU/GPUTracking/Definitions/clusterFinderDefs.h

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -17,13 +17,6 @@
1717

1818
#include "GPUDef.h"
1919

20-
#ifndef __OPENCL__
21-
using uchar = uint8_t;
22-
#endif
23-
#ifdef __APPLE__
24-
using ulong = uint64_t;
25-
#endif
26-
2720
/* #define CHARGEMAP_TIME_MAJOR_LAYOUT */
2821
#define CHARGEMAP_TILING_LAYOUT
2922

@@ -53,14 +46,8 @@ using ulong = uint64_t;
5346
#define TPC_MAX_FRAGMENT_LEN_HOST 1000
5447
#define TPC_MAX_FRAGMENT_LEN_PADDED(size) ((size) + 2 * GPUCF_PADDING_TIME)
5548

56-
#if 0
57-
#define DBG_PRINT(msg, ...) printf(msg "\n", __VA_ARGS__)
58-
#else
59-
#define DBG_PRINT(msg, ...) static_cast<void>(0)
60-
#endif
61-
6249
#ifdef GPUCA_GPUCODE
63-
#define CPU_ONLY(x) static_cast<void>(0)
50+
#define CPU_ONLY(x)
6451
#define CPU_PTR(x) nullptr
6552
#else
6653
#define CPU_ONLY(x) x

GPU/GPUTracking/TPCClusterFinder/CfConsts.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -70,7 +70,7 @@ GPUconstexpr() tpccf::Delta2 OuterNeighbors[16] =
7070
{2, 2},
7171
{1, 2}};
7272

73-
GPUconstexpr() uchar OuterToInner[16] =
73+
GPUconstexpr() uint8_t OuterToInner[16] =
7474
{
7575
0, 0, 0,
7676

@@ -90,7 +90,7 @@ GPUconstexpr() uchar OuterToInner[16] =
9090

9191
// outer to inner mapping change for the peak counting step,
9292
// as the other position is the position of the peak
93-
GPUconstexpr() uchar OuterToInnerInv[16] =
93+
GPUconstexpr() uint8_t OuterToInnerInv[16] =
9494
{
9595
1,
9696
0,
@@ -153,7 +153,7 @@ GPUconstexpr() tpccf::Delta2 NoiseSuppressionNeighbors[NOISE_SUPPRESSION_NEIGHBO
153153
{2, 2},
154154
{2, 3}};
155155

156-
GPUconstexpr() uint NoiseSuppressionMinima[NOISE_SUPPRESSION_NEIGHBOR_NUM] =
156+
GPUconstexpr() uint32_t NoiseSuppressionMinima[NOISE_SUPPRESSION_NEIGHBOR_NUM] =
157157
{
158158
(1 << 8) | (1 << 9),
159159
(1 << 9),

GPU/GPUTracking/TPCClusterFinder/CfFragment.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -104,7 +104,7 @@ struct CfFragment {
104104
}
105105

106106
private:
107-
GPUd() CfFragment(uint index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen)
107+
GPUd() CfFragment(uint32_t index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen)
108108
{
109109
this->index = index_;
110110
this->hasBacklog = hasBacklog_;

GPU/GPUTracking/TPCClusterFinder/CfUtils.h

Lines changed: 27 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -32,19 +32,19 @@ class CfUtils
3232
return (pos.pad() < 2 || pos.pad() >= padsPerRow - 2);
3333
}
3434

35-
static GPUdi() bool innerAboveThreshold(uchar aboveThreshold, ushort outerIdx)
35+
static GPUdi() bool innerAboveThreshold(uint8_t aboveThreshold, uint16_t outerIdx)
3636
{
3737
return aboveThreshold & (1 << cfconsts::OuterToInner[outerIdx]);
3838
}
3939

40-
static GPUdi() bool innerAboveThresholdInv(uchar aboveThreshold, ushort outerIdx)
40+
static GPUdi() bool innerAboveThresholdInv(uint8_t aboveThreshold, uint16_t outerIdx)
4141
{
4242
return aboveThreshold & (1 << cfconsts::OuterToInnerInv[outerIdx]);
4343
}
4444

45-
static GPUdi() bool isPeak(uchar peak) { return peak & 0x01; }
45+
static GPUdi() bool isPeak(uint8_t peak) { return peak & 0x01; }
4646

47-
static GPUdi() bool isAboveThreshold(uchar peak) { return peak >> 1; }
47+
static GPUdi() bool isAboveThreshold(uint8_t peak) { return peak >> 1; }
4848

4949
static GPUdi() int32_t warpPredicateScan(int32_t pred, int32_t* sum)
5050
{
@@ -159,14 +159,14 @@ class CfUtils
159159
}
160160

161161
template <size_t SCRATCH_PAD_WORK_GROUP_SIZE, typename SharedMemory>
162-
static GPUdi() ushort partition(SharedMemory& smem, ushort ll, bool pred, ushort partSize, ushort* newPartSize)
162+
static GPUdi() uint16_t partition(SharedMemory& smem, uint16_t ll, bool pred, uint16_t partSize, uint16_t* newPartSize)
163163
{
164164
bool participates = ll < partSize;
165165

166166
int32_t part;
167167
int32_t lpos = blockPredicateScan<SCRATCH_PAD_WORK_GROUP_SIZE>(smem, int32_t(!pred && participates), &part);
168168

169-
ushort pos = (participates && !pred) ? lpos : part;
169+
uint16_t pos = (participates && !pred) ? lpos : part;
170170

171171
*newPartSize = part;
172172
return pos;
@@ -175,24 +175,24 @@ class CfUtils
175175
template <typename T>
176176
static GPUdi() void blockLoad(
177177
const Array2D<T>& map,
178-
uint wgSize,
179-
uint elems,
180-
ushort ll,
181-
uint offset,
182-
uint N,
178+
uint32_t wgSize,
179+
uint32_t elems,
180+
uint16_t ll,
181+
uint32_t offset,
182+
uint32_t N,
183183
GPUconstexprref() const tpccf::Delta2* neighbors,
184184
const ChargePos* posBcast,
185185
GPUgeneric() T* buf)
186186
{
187187
#if defined(GPUCA_GPUCODE)
188188
GPUbarrier();
189-
ushort x = ll % N;
190-
ushort y = ll / N;
189+
uint16_t x = ll % N;
190+
uint16_t y = ll / N;
191191
tpccf::Delta2 d = neighbors[x + offset];
192192

193193
for (uint32_t i = y; i < wgSize; i += (elems / N)) {
194194
ChargePos readFrom = posBcast[i];
195-
uint writeTo = N * i + x;
195+
uint32_t writeTo = N * i + x;
196196
buf[writeTo] = map[readFrom.delta(d)];
197197
}
198198
GPUbarrier();
@@ -208,7 +208,7 @@ class CfUtils
208208
for (uint32_t i = 0; i < N; i++) {
209209
tpccf::Delta2 d = neighbors[i + offset];
210210

211-
uint writeTo = N * ll + i;
211+
uint32_t writeTo = N * ll + i;
212212
buf[writeTo] = map[readFrom.delta(d)];
213213
}
214214

@@ -219,25 +219,25 @@ class CfUtils
219219
template <typename T, bool Inv = false>
220220
static GPUdi() void condBlockLoad(
221221
const Array2D<T>& map,
222-
ushort wgSize,
223-
ushort elems,
224-
ushort ll,
225-
ushort offset,
226-
ushort N,
222+
uint16_t wgSize,
223+
uint16_t elems,
224+
uint16_t ll,
225+
uint16_t offset,
226+
uint16_t N,
227227
GPUconstexprref() const tpccf::Delta2* neighbors,
228228
const ChargePos* posBcast,
229-
const uchar* aboveThreshold,
229+
const uint8_t* aboveThreshold,
230230
GPUgeneric() T* buf)
231231
{
232232
#if defined(GPUCA_GPUCODE)
233233
GPUbarrier();
234-
ushort y = ll / N;
235-
ushort x = ll % N;
234+
uint16_t y = ll / N;
235+
uint16_t x = ll % N;
236236
tpccf::Delta2 d = neighbors[x + offset];
237237
for (uint32_t i = y; i < wgSize; i += (elems / N)) {
238238
ChargePos readFrom = posBcast[i];
239-
uchar above = aboveThreshold[i];
240-
uint writeTo = N * i + x;
239+
uint8_t above = aboveThreshold[i];
240+
uint32_t writeTo = N * i + x;
241241
T v(0);
242242
bool cond = (Inv) ? innerAboveThresholdInv(above, x + offset)
243243
: innerAboveThreshold(above, x + offset);
@@ -253,13 +253,13 @@ class CfUtils
253253
}
254254

255255
ChargePos readFrom = posBcast[ll];
256-
uchar above = aboveThreshold[ll];
256+
uint8_t above = aboveThreshold[ll];
257257
GPUbarrier();
258258

259259
for (uint32_t i = 0; i < N; i++) {
260260
tpccf::Delta2 d = neighbors[i + offset];
261261

262-
uint writeTo = N * ll + i;
262+
uint32_t writeTo = N * ll + i;
263263
T v(0);
264264
bool cond = (Inv) ? innerAboveThresholdInv(above, i + offset)
265265
: innerAboveThreshold(above, i + offset);

GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,7 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::Cl
4242
bool wasSplitInPad = mSplitInPad >= param.rec.tpc.cfMinSplitNum;
4343
bool isSingleCluster = (mPadSigma == 0) || (mTimeSigma == 0);
4444

45-
uchar flags = 0;
45+
uint8_t flags = 0;
4646
flags |= (isEdgeCluster) ? tpc::ClusterNative::flagEdge : 0;
4747
flags |= (wasSplitInTime) ? tpc::ClusterNative::flagSplitTime : 0;
4848
flags |= (wasSplitInPad) ? tpc::ClusterNative::flagSplitPad : 0;

GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -49,8 +49,8 @@ class ClusterAccumulator
4949
float mPadSigma = 0;
5050
float mTimeMean = 0;
5151
float mTimeSigma = 0;
52-
uchar mSplitInTime = 0;
53-
uchar mSplitInPad = 0;
52+
uint8_t mSplitInTime = 0;
53+
uint8_t mSplitInPad = 0;
5454

5555
GPUd() void update(tpccf::Charge, tpccf::Delta2);
5656
};

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,14 +23,14 @@ using namespace GPUCA_NAMESPACE::gpu::tpccf;
2323
template <>
2424
GPUdii() void GPUTPCCFChargeMapFiller::Thread<GPUTPCCFChargeMapFiller::fillIndexMap>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
2525
{
26-
Array2D<uint> indexMap(clusterer.mPindexMap);
26+
Array2D<uint32_t> indexMap(clusterer.mPindexMap);
2727
fillIndexMapImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPmemory->fragment, clusterer.mPdigits, indexMap, clusterer.mPmemory->counters.nDigitsInFragment);
2828
}
2929

3030
GPUd() void GPUTPCCFChargeMapFiller::fillIndexMapImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
3131
const CfFragment& fragment,
3232
const tpc::Digit* digits,
33-
Array2D<uint>& indexMap,
33+
Array2D<uint32_t>& indexMap,
3434
size_t maxDigit)
3535
{
3636
size_t idx = get_global_id(0);

GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -57,7 +57,7 @@ class GPUTPCCFChargeMapFiller : public GPUKernelTemplate
5757
template <int32_t iKernel = defaultKernel, typename... Args>
5858
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args);
5959

60-
static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, Array2D<uint>&, size_t);
60+
static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, Array2D<uint32_t>&, size_t);
6161

6262
static GPUd() void fillFromDigitsImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, size_t, const tpc::Digit*, ChargePos*, Array2D<PackedCharge>&);
6363

0 commit comments

Comments
 (0)