AliceO2Group
diff --git a/‎GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl‎
Lines changed: 1 addition & 1 deletion b/‎GPU/GPUTracking/Base/opencl-common/GPUReconstructionOCL.cl‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx‎
Lines changed: 9 additions & 9 deletions b/‎GPU/GPUTracking/DataCompression/GPUTPCCompressionKernels.cxx‎
Lines changed: 9 additions & 9 deletions
diff --git a/‎GPU/GPUTracking/Definitions/clusterFinderDefs.h‎
Lines changed: 1 addition & 14 deletions b/‎GPU/GPUTracking/Definitions/clusterFinderDefs.h‎
Lines changed: 1 addition & 14 deletions
diff --git a/‎GPU/GPUTracking/TPCClusterFinder/CfConsts.h‎
Lines changed: 3 additions & 3 deletions b/‎GPU/GPUTracking/TPCClusterFinder/CfConsts.h‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎GPU/GPUTracking/TPCClusterFinder/CfFragment.h‎
Lines changed: 1 addition & 1 deletion b/‎GPU/GPUTracking/TPCClusterFinder/CfFragment.h‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎GPU/GPUTracking/TPCClusterFinder/CfUtils.h‎
Lines changed: 27 additions & 27 deletions b/‎GPU/GPUTracking/TPCClusterFinder/CfUtils.h‎
Lines changed: 27 additions & 27 deletions
diff --git a/‎GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx‎
Lines changed: 1 addition & 1 deletion b/‎GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.cxx‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h‎
Lines changed: 2 additions & 2 deletions b/‎GPU/GPUTracking/TPCClusterFinder/ClusterAccumulator.h‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx‎
Lines changed: 2 additions & 2 deletions b/‎GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.cxx‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h‎
Lines changed: 1 addition & 1 deletion b/‎GPU/GPUTracking/TPCClusterFinder/GPUTPCCFChargeMapFiller.h‎
Lines changed: 1 addition & 1 deletion
@@ -65,7 +65,7 @@
 #define int64_t long
 #define int32_t int
 #define int16_t short
-#define int8_t char
+#define int8_t signed char
 
 // Disable assertions since they produce errors in GPU Code
 #ifdef assert
 
@@ -348,13 +348,13 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy<uint8_t>(uint8_t*
   CONSTEXPR const int32_t vec32Elems = CpyVector<uint8_t, Vec32>::Size;
   CONSTEXPR const int32_t vec16Elems = CpyVector<uint8_t, Vec16>::Size;
 
-  if (size >= uint(nThreads * vec128Elems)) {
+  if (size >= uint32_t(nThreads * vec128Elems)) {
     compressorMemcpyVectorised<uint8_t, Vec128>(dst, src, size, nThreads, iThread);
-  } else if (size >= uint(nThreads * vec64Elems)) {
+  } else if (size >= uint32_t(nThreads * vec64Elems)) {
     compressorMemcpyVectorised<uint8_t, Vec64>(dst, src, size, nThreads, iThread);
-  } else if (size >= uint(nThreads * vec32Elems)) {
+  } else if (size >= uint32_t(nThreads * vec32Elems)) {
     compressorMemcpyVectorised<uint8_t, Vec32>(dst, src, size, nThreads, iThread);
-  } else if (size >= uint(nThreads * vec16Elems)) {
+  } else if (size >= uint32_t(nThreads * vec16Elems)) {
     compressorMemcpyVectorised<uint8_t, Vec16>(dst, src, size, nThreads, iThread);
   } else {
     compressorMemcpyBasic(dst, src, size, nThreads, iThread);
@@ -368,11 +368,11 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy<uint16_t>(uint16_t
   CONSTEXPR const int32_t vec64Elems = CpyVector<uint16_t, Vec64>::Size;
   CONSTEXPR const int32_t vec32Elems = CpyVector<uint16_t, Vec32>::Size;
 
-  if (size >= uint(nThreads * vec128Elems)) {
+  if (size >= uint32_t(nThreads * vec128Elems)) {
     compressorMemcpyVectorised<uint16_t, Vec128>(dst, src, size, nThreads, iThread);
-  } else if (size >= uint(nThreads * vec64Elems)) {
+  } else if (size >= uint32_t(nThreads * vec64Elems)) {
     compressorMemcpyVectorised<uint16_t, Vec64>(dst, src, size, nThreads, iThread);
-  } else if (size >= uint(nThreads * vec32Elems)) {
+  } else if (size >= uint32_t(nThreads * vec32Elems)) {
     compressorMemcpyVectorised<uint16_t, Vec32>(dst, src, size, nThreads, iThread);
   } else {
     compressorMemcpyBasic(dst, src, size, nThreads, iThread);
@@ -385,9 +385,9 @@ GPUdi() void GPUTPCCompressionGatherKernels::compressorMemcpy<uint32_t>(uint32_t
   CONSTEXPR const int32_t vec128Elems = CpyVector<uint32_t, Vec128>::Size;
   CONSTEXPR const int32_t vec64Elems = CpyVector<uint32_t, Vec64>::Size;
 
-  if (size >= uint(nThreads * vec128Elems)) {
+  if (size >= uint32_t(nThreads * vec128Elems)) {
     compressorMemcpyVectorised<uint32_t, Vec128>(dst, src, size, nThreads, iThread);
-  } else if (size >= uint(nThreads * vec64Elems)) {
+  } else if (size >= uint32_t(nThreads * vec64Elems)) {
     compressorMemcpyVectorised<uint32_t, Vec64>(dst, src, size, nThreads, iThread);
   } else {
     compressorMemcpyBasic(dst, src, size, nThreads, iThread);
 
@@ -17,13 +17,6 @@
 
 #include "GPUDef.h"
 
-#ifndef __OPENCL__
-using uchar = uint8_t;
-#endif
-#ifdef __APPLE__
-using ulong = uint64_t;
-#endif
-
 /* #define CHARGEMAP_TIME_MAJOR_LAYOUT */
 #define CHARGEMAP_TILING_LAYOUT
 
@@ -53,14 +46,8 @@ using ulong = uint64_t;
 #define TPC_MAX_FRAGMENT_LEN_HOST 1000
 #define TPC_MAX_FRAGMENT_LEN_PADDED(size) ((size) + 2 * GPUCF_PADDING_TIME)
 
-#if 0
-#define DBG_PRINT(msg, ...) printf(msg "\n", __VA_ARGS__)
-#else
-#define DBG_PRINT(msg, ...) static_cast<void>(0)
-#endif
-
 #ifdef GPUCA_GPUCODE
-#define CPU_ONLY(x) static_cast<void>(0)
+#define CPU_ONLY(x)
 #define CPU_PTR(x) nullptr
 #else
 #define CPU_ONLY(x) x
 
@@ -70,7 +70,7 @@ GPUconstexpr() tpccf::Delta2 OuterNeighbors[16] =
     {2, 2},
     {1, 2}};
 
-GPUconstexpr() uchar OuterToInner[16] =
+GPUconstexpr() uint8_t OuterToInner[16] =
   {
     0, 0, 0,
 
@@ -90,7 +90,7 @@ GPUconstexpr() uchar OuterToInner[16] =
 
 // outer to inner mapping change for the peak counting step,
 // as the other position is the position of the peak
-GPUconstexpr() uchar OuterToInnerInv[16] =
+GPUconstexpr() uint8_t OuterToInnerInv[16] =
   {
     1,
     0,
@@ -153,7 +153,7 @@ GPUconstexpr() tpccf::Delta2 NoiseSuppressionNeighbors[NOISE_SUPPRESSION_NEIGHBO
     {2, 2},
     {2, 3}};
 
-GPUconstexpr() uint NoiseSuppressionMinima[NOISE_SUPPRESSION_NEIGHBOR_NUM] =
+GPUconstexpr() uint32_t NoiseSuppressionMinima[NOISE_SUPPRESSION_NEIGHBOR_NUM] =
   {
     (1 << 8) | (1 << 9),
     (1 << 9),
 
@@ -104,7 +104,7 @@ struct CfFragment {
   }
 
  private:
-  GPUd() CfFragment(uint index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen)
+  GPUd() CfFragment(uint32_t index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen)
   {
     this->index = index_;
     this->hasBacklog = hasBacklog_;
 
@@ -32,19 +32,19 @@ class CfUtils
     return (pos.pad() < 2 || pos.pad() >= padsPerRow - 2);
   }
 
-  static GPUdi() bool innerAboveThreshold(uchar aboveThreshold, ushort outerIdx)
+  static GPUdi() bool innerAboveThreshold(uint8_t aboveThreshold, uint16_t outerIdx)
   {
     return aboveThreshold & (1 << cfconsts::OuterToInner[outerIdx]);
   }
 
-  static GPUdi() bool innerAboveThresholdInv(uchar aboveThreshold, ushort outerIdx)
+  static GPUdi() bool innerAboveThresholdInv(uint8_t aboveThreshold, uint16_t outerIdx)
   {
     return aboveThreshold & (1 << cfconsts::OuterToInnerInv[outerIdx]);
   }
 
-  static GPUdi() bool isPeak(uchar peak) { return peak & 0x01; }
+  static GPUdi() bool isPeak(uint8_t peak) { return peak & 0x01; }
 
-  static GPUdi() bool isAboveThreshold(uchar peak) { return peak >> 1; }
+  static GPUdi() bool isAboveThreshold(uint8_t peak) { return peak >> 1; }
 
   static GPUdi() int32_t warpPredicateScan(int32_t pred, int32_t* sum)
   {
@@ -159,14 +159,14 @@ class CfUtils
   }
 
   template <size_t SCRATCH_PAD_WORK_GROUP_SIZE, typename SharedMemory>
-  static GPUdi() ushort partition(SharedMemory& smem, ushort ll, bool pred, ushort partSize, ushort* newPartSize)
+  static GPUdi() uint16_t partition(SharedMemory& smem, uint16_t ll, bool pred, uint16_t partSize, uint16_t* newPartSize)
   {
     bool participates = ll < partSize;
 
     int32_t part;
     int32_t lpos = blockPredicateScan<SCRATCH_PAD_WORK_GROUP_SIZE>(smem, int32_t(!pred && participates), &part);
 
-    ushort pos = (participates && !pred) ? lpos : part;
+    uint16_t pos = (participates && !pred) ? lpos : part;
 
     *newPartSize = part;
     return pos;
@@ -175,24 +175,24 @@ class CfUtils
   template <typename T>
   static GPUdi() void blockLoad(
     const Array2D<T>& map,
-    uint wgSize,
-    uint elems,
-    ushort ll,
-    uint offset,
-    uint N,
+    uint32_t wgSize,
+    uint32_t elems,
+    uint16_t ll,
+    uint32_t offset,
+    uint32_t N,
     GPUconstexprref() const tpccf::Delta2* neighbors,
     const ChargePos* posBcast,
     GPUgeneric() T* buf)
   {
 #if defined(GPUCA_GPUCODE)
     GPUbarrier();
-    ushort x = ll % N;
-    ushort y = ll / N;
+    uint16_t x = ll % N;
+    uint16_t y = ll / N;
     tpccf::Delta2 d = neighbors[x + offset];
 
     for (uint32_t i = y; i < wgSize; i += (elems / N)) {
       ChargePos readFrom = posBcast[i];
-      uint writeTo = N * i + x;
+      uint32_t writeTo = N * i + x;
       buf[writeTo] = map[readFrom.delta(d)];
     }
     GPUbarrier();
@@ -208,7 +208,7 @@ class CfUtils
     for (uint32_t i = 0; i < N; i++) {
       tpccf::Delta2 d = neighbors[i + offset];
 
-      uint writeTo = N * ll + i;
+      uint32_t writeTo = N * ll + i;
       buf[writeTo] = map[readFrom.delta(d)];
     }
 
@@ -219,25 +219,25 @@ class CfUtils
   template <typename T, bool Inv = false>
   static GPUdi() void condBlockLoad(
     const Array2D<T>& map,
-    ushort wgSize,
-    ushort elems,
-    ushort ll,
-    ushort offset,
-    ushort N,
+    uint16_t wgSize,
+    uint16_t elems,
+    uint16_t ll,
+    uint16_t offset,
+    uint16_t N,
     GPUconstexprref() const tpccf::Delta2* neighbors,
     const ChargePos* posBcast,
-    const uchar* aboveThreshold,
+    const uint8_t* aboveThreshold,
     GPUgeneric() T* buf)
   {
 #if defined(GPUCA_GPUCODE)
     GPUbarrier();
-    ushort y = ll / N;
-    ushort x = ll % N;
+    uint16_t y = ll / N;
+    uint16_t x = ll % N;
     tpccf::Delta2 d = neighbors[x + offset];
     for (uint32_t i = y; i < wgSize; i += (elems / N)) {
       ChargePos readFrom = posBcast[i];
-      uchar above = aboveThreshold[i];
-      uint writeTo = N * i + x;
+      uint8_t above = aboveThreshold[i];
+      uint32_t writeTo = N * i + x;
       T v(0);
       bool cond = (Inv) ? innerAboveThresholdInv(above, x + offset)
                         : innerAboveThreshold(above, x + offset);
@@ -253,13 +253,13 @@ class CfUtils
     }
 
     ChargePos readFrom = posBcast[ll];
-    uchar above = aboveThreshold[ll];
+    uint8_t above = aboveThreshold[ll];
     GPUbarrier();
 
     for (uint32_t i = 0; i < N; i++) {
       tpccf::Delta2 d = neighbors[i + offset];
 
-      uint writeTo = N * ll + i;
+      uint32_t writeTo = N * ll + i;
       T v(0);
       bool cond = (Inv) ? innerAboveThresholdInv(above, i + offset)
                         : innerAboveThreshold(above, i + offset);
 
@@ -42,7 +42,7 @@ GPUd() bool ClusterAccumulator::toNative(const ChargePos& pos, Charge q, tpc::Cl
   bool wasSplitInPad = mSplitInPad >= param.rec.tpc.cfMinSplitNum;
   bool isSingleCluster = (mPadSigma == 0) || (mTimeSigma == 0);
 
-  uchar flags = 0;
+  uint8_t flags = 0;
   flags |= (isEdgeCluster) ? tpc::ClusterNative::flagEdge : 0;
   flags |= (wasSplitInTime) ? tpc::ClusterNative::flagSplitTime : 0;
   flags |= (wasSplitInPad) ? tpc::ClusterNative::flagSplitPad : 0;
 
@@ -49,8 +49,8 @@ class ClusterAccumulator
   float mPadSigma = 0;
   float mTimeMean = 0;
   float mTimeSigma = 0;
-  uchar mSplitInTime = 0;
-  uchar mSplitInPad = 0;
+  uint8_t mSplitInTime = 0;
+  uint8_t mSplitInPad = 0;
 
   GPUd() void update(tpccf::Charge, tpccf::Delta2);
 };
 
@@ -23,14 +23,14 @@ using namespace GPUCA_NAMESPACE::gpu::tpccf;
 template <>
 GPUdii() void GPUTPCCFChargeMapFiller::Thread<GPUTPCCFChargeMapFiller::fillIndexMap>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)
 {
-  Array2D<uint> indexMap(clusterer.mPindexMap);
+  Array2D<uint32_t> indexMap(clusterer.mPindexMap);
   fillIndexMapImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPmemory->fragment, clusterer.mPdigits, indexMap, clusterer.mPmemory->counters.nDigitsInFragment);
 }
 
 GPUd() void GPUTPCCFChargeMapFiller::fillIndexMapImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,
                                                       const CfFragment& fragment,
                                                       const tpc::Digit* digits,
-                                                      Array2D<uint>& indexMap,
+                                                      Array2D<uint32_t>& indexMap,
                                                       size_t maxDigit)
 {
   size_t idx = get_global_id(0);
 
@@ -57,7 +57,7 @@ class GPUTPCCFChargeMapFiller : public GPUKernelTemplate
   template <int32_t iKernel = defaultKernel, typename... Args>
   GPUd() static void Thread(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer, Args... args);
 
-  static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, Array2D<uint>&, size_t);
+  static GPUd() void fillIndexMapImpl(int32_t, int32_t, int32_t, int32_t, const CfFragment&, const tpc::Digit*, Array2D<uint32_t>&, size_t);
 
   static GPUd() void fillFromDigitsImpl(int32_t, int32_t, int32_t, int32_t, processorType&, const CfFragment&, size_t, const tpc::Digit*, ChargePos*, Array2D<PackedCharge>&);
Original file line number	Diff line number	Diff line change
`@@ -104,7 +104,7 @@ struct CfFragment {`
`104`	`104`	`}`
`105`	`105`
`106`	`106`	`private:`
`107`		`- GPUd() CfFragment(uint index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen)`
	`107`	`+ GPUd() CfFragment(uint32_t index_, bool hasBacklog_, tpccf::TPCTime start_, tpccf::TPCTime totalSliceLen, tpccf::TPCFragmentTime maxSubSliceLen)`
`108`	`108`	`{`
`109`	`109`	`this->index = index_;`
`110`	`110`	`this->hasBacklog = hasBacklog_;`
Original file line number	Diff line number	Diff line change
`@@ -23,14 +23,14 @@ using namespace GPUCA_NAMESPACE::gpu::tpccf;`
`23`	`23`	`template <>`
`24`	`24`	`GPUdii() void GPUTPCCFChargeMapFiller::Thread<GPUTPCCFChargeMapFiller::fillIndexMap>(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread, GPUSharedMemory& smem, processorType& clusterer)`
`25`	`25`	`{`
`26`		`- Array2D<uint> indexMap(clusterer.mPindexMap);`
	`26`	`+ Array2D<uint32_t> indexMap(clusterer.mPindexMap);`
`27`	`27`	`fillIndexMapImpl(get_num_groups(0), get_local_size(0), get_group_id(0), get_local_id(0), clusterer.mPmemory->fragment, clusterer.mPdigits, indexMap, clusterer.mPmemory->counters.nDigitsInFragment);`
`28`	`28`	`}`
`29`	`29`
`30`	`30`	`GPUd() void GPUTPCCFChargeMapFiller::fillIndexMapImpl(int32_t nBlocks, int32_t nThreads, int32_t iBlock, int32_t iThread,`
`31`	`31`	`const CfFragment& fragment,`
`32`	`32`	`const tpc::Digit* digits,`
`33`		`- Array2D<uint>& indexMap,`
	`33`	`+ Array2D<uint32_t>& indexMap,`
`34`	`34`	`size_t maxDigit)`
`35`	`35`	`{`
`36`	`36`	`size_t idx = get_global_id(0);`