Skip to content

Commit 260d7d5

Browse files
committed
GPU CUDA/HIP: Set warp size automatically, not hard-coded
1 parent 013e151 commit 260d7d5

File tree

4 files changed

+8
-15
lines changed

4 files changed

+8
-15
lines changed

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,7 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
111111
constexpr int32_t reqVerMin = 0;
112112
#endif
113113
if (mProcessingSettings.rtc.enable && mProcessingSettings.rtctech.runTest == 2) {
114+
mWarpSize = GPUCA_WARP_SIZE;
114115
genAndLoadRTC();
115116
exit(0);
116117
}
@@ -244,16 +245,12 @@ int32_t GPUReconstructionCUDA::InitDevice_Runtime()
244245
GPUInfo("\ttextureAlignment = %ld", (uint64_t)deviceProp.textureAlignment);
245246
GPUInfo(" ");
246247
}
247-
if (deviceProp.warpSize != GPUCA_WARP_SIZE) {
248+
if (deviceProp.warpSize != GPUCA_WARP_SIZE && !mProcessingSettings.rtc.enable) {
248249
throw std::runtime_error("Invalid warp size on GPU");
249250
}
251+
mWarpSize = deviceProp.warpSize;
250252
mBlockCount = deviceProp.multiProcessorCount;
251253
mMaxBackendThreads = std::max<int32_t>(mMaxBackendThreads, deviceProp.maxThreadsPerBlock * mBlockCount);
252-
#ifndef __HIPCC__ // CUDA
253-
mWarpSize = 32;
254-
#else // HIP
255-
mWarpSize = 64;
256-
#endif
257254
mDeviceName = deviceProp.name;
258255
mDeviceName += " (CUDA GPU)";
259256

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDAGenRTC.cxx

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,7 +73,8 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
7373
}
7474
fclose(fp);
7575
}
76-
const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true);
76+
const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport(*mParDevice, true) +
77+
"#define GPUCA_WARP_SIZE " + std::to_string(mWarpSize) + "\n";
7778
if (mProcessingSettings.rtctech.printLaunchBounds || mProcessingSettings.debugLevel >= 3) {
7879
GPUInfo("RTC Launch Bounds:\n%s", launchBounds.c_str());
7980
}

GPU/GPUTracking/Definitions/GPUDefParametersDefaults.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -275,6 +275,9 @@
275275

276276
#ifdef GPUCA_GPUCODE
277277
// Default settings for GPU, if not already set for selected GPU type
278+
#ifndef GPUCA_WARP_SIZE
279+
#define GPUCA_WARP_SIZE 32
280+
#endif
278281
#ifndef GPUCA_THREAD_COUNT_DEFAULT
279282
#define GPUCA_THREAD_COUNT_DEFAULT 256
280283
#endif

GPU/GPUTracking/Definitions/GPUDefParametersWrapper.h

Lines changed: 0 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -38,14 +38,6 @@
3838
#define GPUCA_MERGER_INTERPOLATION_ERROR_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_MERGER_INTERPOLATION_ERROR_TYPE)
3939
#define GPUCA_DEDX_STORAGE_TYPE_A GPUCA_DETERMINISTIC_CODE(float, GPUCA_DEDX_STORAGE_TYPE)
4040

41-
#ifndef GPUCA_WARP_SIZE
42-
#ifdef GPUCA_GPUCODE
43-
#define GPUCA_WARP_SIZE 32
44-
#else
45-
#define GPUCA_WARP_SIZE 1
46-
#endif
47-
#endif
48-
4941
// #define GPUCA_TRACKLET_CONSTRUCTOR_DO_PROFILE // Output Profiling Data for Tracklet Constructor Tracklet Scheduling
5042

5143
// #define GPUCA_KERNEL_DEBUGGER_OUTPUT

0 commit comments

Comments
 (0)