2222#include " MCLabelAccumulator.h"
2323#endif
2424
25- using namespace GPUCA_NAMESPACE ::gpu;
26- using namespace GPUCA_NAMESPACE ::gpu::tpccf;
25+ using namespace o2 ::gpu;
26+ using namespace o2 ::gpu::tpccf;
2727
2828template <>
29- GPUdii () void GPUTPCNNClusterizer::Thread<0>(int nBlocks, int nThreads, int iBlock, int iThread, GPUSharedMemory& smem, processorType& clusterer, char onlyMC)
29+ GPUdii () void GPUTPCNNClusterizer::Thread<0>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, int8_t onlyMC)
3030{
3131 Array2D<PackedCharge> chargeMap (reinterpret_cast <PackedCharge*>(clusterer.mPchargeMap ));
3232 CPU_ONLY (
@@ -91,19 +91,19 @@ bool GPUTPCNNClusterizer::isBoundary(int row, int pad, int global_shift, const G
9191}
9292
9393template <class T >
94- GPUd () void GPUTPCNNClusterizer::nn_clusterizer(int nBlocks, int nThreads, int iBlock, int iThread,
94+ GPUd () void GPUTPCNNClusterizer::nn_clusterizer(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
9595 processorType& clusterer,
9696 const CfFragment& fragment,
9797 GPUSharedMemory& smem,
9898 const Array2D<PackedCharge>& chargeMap,
9999 const ChargePos* filteredPeakPositions,
100100 const GPUSettingsRec& calib,
101101 MCLabelAccumulator* labelAcc,
102- uint clusternum,
103- uint maxClusterPerRow,
104- uint * clusterInRow,
102+ uint32_t clusternum,
103+ uint32_t maxClusterPerRow,
104+ uint32_t * clusterInRow,
105105 tpc::ClusterNative* clusterByRow,
106- uint * clusterPosInRow)
106+ uint32_t * clusterPosInRow)
107107{
108108
109109 uint glo_idx = get_global_id (0 ) * clusterer.nnClusterizerBatchedMode ;
@@ -422,21 +422,21 @@ GPUd() void GPUTPCNNClusterizer::nn_clusterizer(int nBlocks, int nThreads, int i
422422 }
423423}
424424
425- GPUdii () void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads, int iBlock, int iThread,
425+ GPUdii () void GPUTPCNNClusterizer::computeClustersImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
426426 processorType& clusterer,
427427 const CfFragment& fragment,
428428 GPUSharedMemory& smem,
429429 const Array2D<PackedCharge>& chargeMap,
430430 const ChargePos* filteredPeakPositions,
431431 const GPUSettingsRec& calib,
432432 MCLabelAccumulator* labelAcc,
433- uint clusternum,
434- uint maxClusterPerRow,
435- uint * clusterInRow,
433+ uint32_t clusternum,
434+ uint32_t maxClusterPerRow,
435+ uint32_t * clusterInRow,
436436 tpc::ClusterNative* clusterByRow,
437- uint * clusterPosInRow)
437+ uint32_t * clusterPosInRow)
438438{
439- uint idx = get_global_id (0 );
439+ uint32_t idx = get_global_id (0 );
440440
441441 // For certain configurations dummy work items are added, so the total
442442 // number of work items is dividable by 64.
@@ -478,7 +478,7 @@ GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads
478478 return ;
479479 }
480480
481- uint rowIndex = 0 ;
481+ uint32_t rowIndex = 0 ;
482482 if (clusterByRow != nullptr ) {
483483 rowIndex = sortIntoBuckets (
484484 clusterer,
@@ -499,8 +499,8 @@ GPUdii() void GPUTPCNNClusterizer::computeClustersImpl(int nBlocks, int nThreads
499499
500500GPUdii () void GPUTPCNNClusterizer::updateClusterInner(
501501 const GPUSettingsRec& calib,
502- ushort lid,
503- ushort N,
502+ uint16_t lid,
503+ uint16_t N,
504504 const PackedCharge* buf,
505505 const ChargePos& pos,
506506 ClusterAccumulator* cluster,
@@ -510,15 +510,14 @@ GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
510510 uint8_t aboveThreshold = 0 ;
511511
512512 GPUCA_UNROLL (U (), U ())
513- for (ushort i = 0 ; i < N; i++) {
513+ for (uint16_t i = 0 ; i < N; i++) {
514514 Delta2 d = cfconsts::InnerNeighbors[i];
515515
516516 PackedCharge p = buf[N * lid + i];
517517
518518 Charge q = cluster->updateInner (p, d);
519519
520- CPU_ONLY (
521- labelAcc->collect (pos.delta (d), q));
520+ CPU_ONLY (labelAcc->collect (pos.delta (d), q));
522521
523522 aboveThreshold |= (uint8_t (q > calib.tpc .cfInnerThreshold ) << i);
524523 }
@@ -529,26 +528,25 @@ GPUdii() void GPUTPCNNClusterizer::updateClusterInner(
529528}
530529
531530GPUdii () void GPUTPCNNClusterizer::updateClusterOuter(
532- ushort lid,
533- ushort N,
534- ushort M,
535- ushort offset,
531+ uint16_t lid,
532+ uint16_t N,
533+ uint16_t M,
534+ uint16_t offset,
536535 const PackedCharge* buf,
537536 const ChargePos& pos,
538537 ClusterAccumulator* cluster,
539538 MCLabelAccumulator* labelAcc)
540539{
541540 GPUCA_UNROLL (U (), U ())
542- for (ushort i = offset; i < M + offset; i++) {
541+ for (uint16_t i = offset; i < M + offset; i++) {
543542 PackedCharge p = buf[N * lid + i];
544543
545544 Delta2 d = cfconsts::OuterNeighbors[i];
546545
547546 Charge q = cluster->updateOuter (p, d);
548547 static_cast <void >(q); // Avoid unused varible warning on GPU.
549548
550- CPU_ONLY (
551- labelAcc->collect (pos.delta (d), q));
549+ CPU_ONLY (labelAcc->collect (pos.delta (d), q));
552550 }
553551}
554552
@@ -562,7 +560,7 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
562560 ClusterAccumulator* myCluster,
563561 MCLabelAccumulator* labelAcc)
564562{
565- ushort ll = get_local_id (0 );
563+ uint16_t ll = get_local_id (0 );
566564
567565 posBcast[ll] = pos;
568566 GPUbarrier ();
@@ -587,11 +585,11 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
587585 labelAcc,
588586 innerAboveThreshold);
589587
590- ushort wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1 ) / 2 ;
588+ uint16_t wgSizeHalf = (SCRATCH_PAD_WORK_GROUP_SIZE + 1 ) / 2 ;
591589
592590 bool inGroup1 = ll < wgSizeHalf;
593591
594- ushort llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
592+ uint16_t llhalf = (inGroup1) ? ll : (ll - wgSizeHalf);
595593
596594 CfUtils::condBlockLoad (
597595 chargeMap,
@@ -643,14 +641,14 @@ GPUdii() void GPUTPCNNClusterizer::buildCluster(
643641#endif
644642}
645643
646- GPUd () uint GPUTPCNNClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint row, uint maxElemsPerBucket, uint * elemsInBucket, tpc::ClusterNative* buckets)
644+ GPUd () uint32_t GPUTPCNNClusterizer::sortIntoBuckets(processorType& clusterer, const tpc::ClusterNative& cluster, uint32_t row, uint32_t maxElemsPerBucket, uint32_t * elemsInBucket, tpc::ClusterNative* buckets)
647645{
648- uint index = CAMath::AtomicAdd (&elemsInBucket[row], 1u );
646+ uint32_t index = CAMath::AtomicAdd (&elemsInBucket[row], 1u );
649647 if (index < maxElemsPerBucket) {
650648 buckets[maxElemsPerBucket * row + index] = cluster;
651649 } else {
652650 clusterer.raiseError (GPUErrors::ERROR_CF_ROW_CLUSTER_OVERFLOW, clusterer.mISlice * 1000 + row, index, maxElemsPerBucket);
653651 CAMath::AtomicExch (&elemsInBucket[row], maxElemsPerBucket);
654652 }
655653 return index;
656- }
654+ }
0 commit comments