@@ -32,19 +32,19 @@ class CfUtils
3232 return (pos.pad () < 2 || pos.pad () >= padsPerRow - 2 );
3333 }
3434
35- static GPUdi () bool innerAboveThreshold(uchar aboveThreshold, ushort outerIdx)
35+ static GPUdi () bool innerAboveThreshold(uint8_t aboveThreshold, uint16_t outerIdx)
3636 {
3737 return aboveThreshold & (1 << cfconsts::OuterToInner[outerIdx]);
3838 }
3939
40- static GPUdi () bool innerAboveThresholdInv(uchar aboveThreshold, ushort outerIdx)
40+ static GPUdi () bool innerAboveThresholdInv(uint8_t aboveThreshold, uint16_t outerIdx)
4141 {
4242 return aboveThreshold & (1 << cfconsts::OuterToInnerInv[outerIdx]);
4343 }
4444
45- static GPUdi () bool isPeak(uchar peak) { return peak & 0x01 ; }
45+ static GPUdi () bool isPeak(uint8_t peak) { return peak & 0x01 ; }
4646
47- static GPUdi () bool isAboveThreshold(uchar peak) { return peak >> 1 ; }
47+ static GPUdi () bool isAboveThreshold(uint8_t peak) { return peak >> 1 ; }
4848
4949 static GPUdi () int32_t warpPredicateScan(int32_t pred, int32_t * sum)
5050 {
@@ -159,14 +159,14 @@ class CfUtils
159159 }
160160
161161 template <size_t SCRATCH_PAD_WORK_GROUP_SIZE, typename SharedMemory>
162- static GPUdi () ushort partition(SharedMemory& smem, ushort ll, bool pred, ushort partSize, ushort * newPartSize)
162+ static GPUdi () uint16_t partition(SharedMemory& smem, uint16_t ll, bool pred, uint16_t partSize, uint16_t * newPartSize)
163163 {
164164 bool participates = ll < partSize;
165165
166166 int32_t part;
167167 int32_t lpos = blockPredicateScan<SCRATCH_PAD_WORK_GROUP_SIZE>(smem, int32_t (!pred && participates), &part);
168168
169- ushort pos = (participates && !pred) ? lpos : part;
169+ uint16_t pos = (participates && !pred) ? lpos : part;
170170
171171 *newPartSize = part;
172172 return pos;
@@ -175,24 +175,24 @@ class CfUtils
175175 template <typename T>
176176 static GPUdi () void blockLoad(
177177 const Array2D<T>& map,
178- uint wgSize,
179- uint elems,
180- ushort ll,
181- uint offset,
182- uint N,
178+ uint32_t wgSize,
179+ uint32_t elems,
180+ uint16_t ll,
181+ uint32_t offset,
182+ uint32_t N,
183183 GPUconstexprref () const tpccf::Delta2* neighbors,
184184 const ChargePos* posBcast,
185185 GPUgeneric() T* buf)
186186 {
187187#if defined(GPUCA_GPUCODE)
188188 GPUbarrier ();
189- ushort x = ll % N;
190- ushort y = ll / N;
189+ uint16_t x = ll % N;
190+ uint16_t y = ll / N;
191191 tpccf::Delta2 d = neighbors[x + offset];
192192
193193 for (uint32_t i = y; i < wgSize; i += (elems / N)) {
194194 ChargePos readFrom = posBcast[i];
195- uint writeTo = N * i + x;
195+ uint32_t writeTo = N * i + x;
196196 buf[writeTo] = map[readFrom.delta (d)];
197197 }
198198 GPUbarrier ();
@@ -208,7 +208,7 @@ class CfUtils
208208 for (uint32_t i = 0 ; i < N; i++) {
209209 tpccf::Delta2 d = neighbors[i + offset];
210210
211- uint writeTo = N * ll + i;
211+ uint32_t writeTo = N * ll + i;
212212 buf[writeTo] = map[readFrom.delta (d)];
213213 }
214214
@@ -219,25 +219,25 @@ class CfUtils
219219 template <typename T, bool Inv = false >
220220 static GPUdi () void condBlockLoad(
221221 const Array2D<T>& map,
222- ushort wgSize,
223- ushort elems,
224- ushort ll,
225- ushort offset,
226- ushort N,
222+ uint16_t wgSize,
223+ uint16_t elems,
224+ uint16_t ll,
225+ uint16_t offset,
226+ uint16_t N,
227227 GPUconstexprref () const tpccf::Delta2* neighbors,
228228 const ChargePos* posBcast,
229- const uchar * aboveThreshold,
229+ const uint8_t * aboveThreshold,
230230 GPUgeneric() T* buf)
231231 {
232232#if defined(GPUCA_GPUCODE)
233233 GPUbarrier ();
234- ushort y = ll / N;
235- ushort x = ll % N;
234+ uint16_t y = ll / N;
235+ uint16_t x = ll % N;
236236 tpccf::Delta2 d = neighbors[x + offset];
237237 for (uint32_t i = y; i < wgSize; i += (elems / N)) {
238238 ChargePos readFrom = posBcast[i];
239- uchar above = aboveThreshold[i];
240- uint writeTo = N * i + x;
239+ uint8_t above = aboveThreshold[i];
240+ uint32_t writeTo = N * i + x;
241241 T v (0 );
242242 bool cond = (Inv) ? innerAboveThresholdInv (above, x + offset)
243243 : innerAboveThreshold (above, x + offset);
@@ -253,13 +253,13 @@ class CfUtils
253253 }
254254
255255 ChargePos readFrom = posBcast[ll];
256- uchar above = aboveThreshold[ll];
256+ uint8_t above = aboveThreshold[ll];
257257 GPUbarrier ();
258258
259259 for (uint32_t i = 0 ; i < N; i++) {
260260 tpccf::Delta2 d = neighbors[i + offset];
261261
262- uint writeTo = N * ll + i;
262+ uint32_t writeTo = N * ll + i;
263263 T v (0 );
264264 bool cond = (Inv) ? innerAboveThresholdInv (above, i + offset)
265265 : innerAboveThreshold (above, i + offset);
0 commit comments