Skip to content

Commit dc40fc3

Browse files
committed
GPU RTC: Fix launch bounds, cut 3rd runtime parameter to force number of blocks started
1 parent fc83851 commit dc40fc3

File tree

2 files changed

+3
-1
lines changed

2 files changed

+3
-1
lines changed

GPU/GPUTracking/Definitions/GPUDefParametersDefault.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,8 @@
2424
#include "GPUCommonDef.h"
2525
#include "GPUDefMacros.h"
2626

27+
// Launch bound definition, 3 optional parameters: maxThreads per block, minBlocks per multiprocessor, force number of blocks (not passed to compiler as launch bounds)
28+
2729
// GPU Run Configuration
2830
#ifdef GPUCA_GPUCODE
2931
#if defined(GPUCA_GPUTYPE_MI2xx)

GPU/GPUTracking/Definitions/GPUDefParametersLoad.template.inc

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ static GPUDefParameters GPUDefParametersLoad()
3737
if (par.par_LB_minBlocks[i] > 0) { \
3838
o << ", " << par.par_LB_minBlocks[i]; \
3939
} \
40-
if (par.par_LB_forceBlocks[i] > 0) { \
40+
if (!forRTC && par.par_LB_forceBlocks[i] > 0) { \
4141
o << ", " << par.par_LB_forceBlocks[i]; \
4242
} \
4343
o << "\n"; \

0 commit comments

Comments
 (0)