Skip to content

Commit 3c4c587

Browse files
committed
Fixing build issues
1 parent 639b895 commit 3c4c587

File tree

2 files changed

+43
-50
lines changed

2 files changed

+43
-50
lines changed

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.cxx

Lines changed: 31 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -22,11 +22,11 @@
2222
#include "MCLabelAccumulator.h"
2323
#endif
2424

25-
using namespace GPUCA_NAMESPACE::gpu;
26-
using namespace GPUCA_NAMESPACE::gpu::tpccf;
25+
using namespace o2::gpu;
26+
using namespace o2::gpu::tpccf;
2727

2828
template <>
29-
GPUdii() void GPUTPCNNClusterizer::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer, char onlyMC)
29+
GPUdii() void GPUTPCNNClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC)
3030
{
3131
Array2D<PackedCharge> chargeMap(reinterpret_cast<PackedCharge*>(clusterer.mPchargeMap));
3232
CPU_ONLY(
@@ -91,19 +91,19 @@ bool GPUTPCNNClusterizer::isBoundary(int row, int pad, int global_shift, const G
9191
}
9292

9393
template <class T>
94-
GPUd() void GPUTPCNNClusterizer::nn_clusterizer(int nBlocks, int nThreads, int iBlock, int iThread,
94+
GPUd() void GPUTPCNNClusterizer::nn_clusterizer(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
9595
processorType& clusterer,
9696
const CfFragment& fragment,
9797
GPUSharedMemory& smem,
9898
const Array2D<PackedCharge>& chargeMap,
9999
const ChargePos* filteredPeakPositions,
100100
const GPUSettingsRec& calib,
101101
MCLabelAccumulator* labelAcc,
102-
uint clusternum,
103-
uint maxClusterPerRow,
104-
uint* clusterInRow,
102+
uint32_t clusternum,
103+
uint32_t maxClusterPerRow,
104+
uint32_t* clusterInRow,
105105
tpc::ClusterNative* clusterByRow,
106-
uint* clusterPosInRow)
106+
uint32_t* clusterPosInRow)
107107
{
108108

109109
uint glo_idx = get_global_id(0) * clusterer.nnClusterizerBatchedMode;
@@ -422,21 +422,21 @@ GPUd() void GPUTPCNNClusterizer::nn_clusterizer(int nBlocks, int nThreads, int i
422422
}
423423
}
424424

425-
GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads, int iBlock, int iThread,
425+
GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
426426
processorType& clusterer,
427427
const CfFragment& fragment,
428428
GPUSharedMemory& smem,
429429
const Array2D<PackedCharge>& chargeMap,
430430
const ChargePos* filteredPeakPositions,
431431
const GPUSettingsRec& calib,
432432
MCLabelAccumulator* labelAcc,
433-
uint clusternum,
434-
uint maxClusterPerRow,
435-
uint* clusterInRow,
433+
uint32_t clusternum,
434+
uint32_t maxClusterPerRow,
435+
uint32_t* clusterInRow,
436436
tpc::ClusterNative* clusterByRow,
437-
uint* clusterPosInRow)
437+
uint32_t* clusterPosInRow)
438438
{
439-
uint idx = get_global_id(0);
439+
uint32_t idx = get_global_id(0);
440440

441441
// For certain configurations dummy work items are added, so the total
442442
// number of work items is dividable by 64.
@@ -478,7 +478,7 @@ GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads
478478
return;
479479
}
480480

481-
uint rowIndex = 0;
481+
uint32_t rowIndex = 0;
482482
if (clusterByRow != nullptr) {
483483
rowIndex = sortIntoBuckets(
484484
clusterer,
@@ -499,8 +499,8 @@ GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads
499499

500500
GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
501501
const GPUSettingsRec& calib,
502-
ushort lid,
503-
ushort N,
502+
uint16_t lid,
503+
uint16_t N,
504504
const PackedCharge* buf,
505505
const ChargePos& pos,
506506
ClusterAccumulator* cluster,
@@ -510,15 +510,14 @@ GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
510510
uint8_t aboveThreshold = 0;
511511

512512
GPUCA_UNROLL(U(), U())
513-
for (ushort i = 0; i < N; i++) {
513+
for (uint16_t i = 0; i < N; i++) {
514514
Delta2 d = cfconsts::InnerNeighbors[i];
515515

516516
PackedCharge p = buf[N * lid + i];
517517

518518
Charge q = cluster->updateInner(p, d);
519519

520-
CPU_ONLY(
521-
labelAcc->collect(pos.delta(d), q));
520+
CPU_ONLY(labelAcc->collect(pos.delta(d), q));
522521

523522
aboveThreshold |= (uint8_t(q > calib.tpc.cfInnerThreshold) << i);
524523
}
@@ -529,26 +528,25 @@ GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
529528
}
530529

531530
GPUdii() void GPUTPCNNClusterizer::updateClusterOuter(
532-
ushort lid,
533-
ushort N,
534-
ushort M,
535-
ushort offset,
531+
uint16_t lid,
532+
uint16_t N,
533+
uint16_t M,
534+
uint16_t offset,
536535
const PackedCharge* buf,
537536
const ChargePos& pos,
538537
ClusterAccumulator* cluster,
539538
MCLabelAccumulator* labelAcc)
540539
{
541540
GPUCA_UNROLL(U(), U())
542-
for (ushort i = offset; i < M + offset; i++) {
541+
for (uint16_t i = offset; i < M + offset; i++) {
543542
PackedCharge p = buf[N * lid + i];
544543

545544
Delta2 d = cfconsts::OuterNeighbors[i];
546545

547546
Charge q = cluster->updateOuter(p, d);
548547
static_cast<void>(q); // Avoid unused varible warning on GPU.
549548

550-
CPU_ONLY(
551-
labelAcc->collect(pos.delta(d), q));
549+
CPU_ONLY(labelAcc->collect(pos.delta(d), q));
552550
}
553551
}
554552

@@ -562,7 +560,7 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
562560
ClusterAccumulator* myCluster,
563561
MCLabelAccumulator* labelAcc)
564562
{
565-
ushort ll = get_local_id(0);
563+
uint16_t ll = get_local_id(0);
566564

567565
posBcast[ll] = pos;
568566
GPUbarrier();
@@ -587,11 +585,11 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
587585
labelAcc,
588586
innerAboveThreshold);
589587

590-
ushort wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2;
588+
uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1) / 2;
591589

592590
bool inGroup1 = ll < wgSizeHalf;
593591

594-
ushort llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
592+
uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
595593

596594
CfUtils::condBlockLoad(
597595
chargeMap,
@@ -643,14 +641,14 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
643641
#endif
644642
}
645643

646-
GPUd() uint GPUTPCNNClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint row, uint maxElemsPerBucket, uint* elemsInBucket, tpc::ClusterNative* buckets)
644+
GPUd() uint32_t GPUTPCNNClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t* elemsInBucket, tpc::ClusterNative* buckets)
647645
{
648-
uint index = CAMath::AtomicAdd(&elemsInBucket[row], 1u);
646+
uint32_t index = CAMath::AtomicAdd(&elemsInBucket[row], 1u);
649647
if (index < maxElemsPerBucket) {
650648
buckets[maxElemsPerBucket * row + index] = cluster;
651649
} else {
652650
clusterer.raiseError(GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISlice * 1000 + row, index, maxElemsPerBucket);
653651
CAMath::AtomicExch(&elemsInBucket[row], maxElemsPerBucket);
654652
}
655653
return index;
656-
}
654+
}

GPU/GPUTracking/TPCClusterFinder/GPUTPCNNClusterizer.h

Lines changed: 12 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,7 @@ namespace o2::tpc
2727
struct ClusterNative;
2828
} // namespace o2::tpc
2929

30-
namespace GPUCA_NAMESPACE::gpu
30+
namespace o2::gpu
3131
{
3232

3333
class ClusterAccumulator;
@@ -43,54 +43,49 @@ class GPUTPCNNClusterizer : public GPUKernelTemplate
4343
uint8_t innerAboveThreshold[SCRATCH_PAD_WORK_GROUP_SIZE];
4444
};
4545

46-
#ifdef GPUCA_HAVE_O2HEADERS
4746
typedef GPUTPCClusterFinder processorType;
4847
GPUhdi() static processorType* Processor(GPUConstantMem& processors)
4948
{
5049
return processors.tpcClusterer;
5150
}
52-
#endif
5351

5452
GPUhdi() constexpr static GPUDataTypes::RecoStep GetRecoStep()
5553
{
5654
return GPUDataTypes::RecoStep::TPCClusterFinding;
5755
}
5856

59-
template <int iKernel = defaultKernel>
60-
GPUd() static void Thread(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer, char);
57+
template <int32_t iKernel = defaultKernel>
58+
GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t);
6159

62-
static GPUd() void computeClustersImpl(int, int, int, int, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D<PackedCharge>&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint, uint, uint*, tpc::ClusterNative*, uint*);
60+
static GPUd() void computeClustersImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, GPUSharedMemory&, const Array2D<PackedCharge>&, const ChargePos*, const GPUSettingsRec&, MCLabelAccumulator*, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*, uint32_t*);
6361

64-
static GPUd() void exec(int, int, int, int, GPUSharedMemory&, processorType&, char);
6562
static int padOffset(int, int, const GPUTPCGeometry&);
6663
static int rowOffset(int, int);
6764
static bool isBoundary(int, int, int, const GPUTPCGeometry&);
6865

6966
template <class T>
70-
static GPUd() void nn_clusterizer(int, int, int, int,
67+
static GPUd() void nn_clusterizer(int32_t, int32_t, int32_t, int32_t,
7168
processorType&,
7269
const CfFragment&,
7370
GPUSharedMemory&,
7471
const Array2D<PackedCharge>&,
7572
const ChargePos*,
7673
const GPUSettingsRec&,
7774
MCLabelAccumulator*,
78-
uint,
79-
uint,
80-
uint*,
75+
uint32_t,
76+
uint32_t,
77+
uint32_t*,
8178
tpc::ClusterNative*,
82-
uint*);
79+
uint32_t*);
8380

8481
private:
85-
// ---------------------------------
82+
static GPUd() void updateClusterInner(const GPUSettingsRec&, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*);
8683

87-
static GPUd() void updateClusterInner(const GPUSettingsRec&, ushort, ushort, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*, uint8_t*);
88-
89-
static GPUd() void updateClusterOuter(ushort, ushort, ushort, ushort, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*);
84+
static GPUd() void updateClusterOuter(uint16_t, uint16_t, uint16_t, uint16_t, const PackedCharge*, const ChargePos&, ClusterAccumulator*, MCLabelAccumulator*);
9085

9186
static GPUd() void buildCluster(const GPUSettingsRec&, const Array2D<PackedCharge>&, ChargePos, ChargePos*, PackedCharge*, uint8_t*, ClusterAccumulator*, MCLabelAccumulator*);
9287

93-
static GPUd() uint sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint, uint, uint*, tpc::ClusterNative*);
88+
static GPUd() uint32_t sortIntoBuckets(processorType&, const tpc::ClusterNative&, uint32_t, uint32_t, uint32_t*, tpc::ClusterNative*);
9489
};
9590

9691
} // namespace GPUCA_NAMESPACE::gpu

0 commit comments

Comments
 (0)