1313// / \author David Rohr
1414
1515#define GPUCA_GPUCODE_HOSTONLY
16+ #define GPUCA_DEF_PARAMETERS_LOAD_DEFAULTS
17+ #include " GPUDefParametersDefault.h"
18+ #include " GPUDefParametersLoad.inc"
19+
1620#include " GPUReconstructionCUDA.h"
1721#include " GPUParamRTC.h"
1822#include " GPUDefMacros.h"
@@ -55,13 +59,15 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
5559 std::string baseCommand = (mProcessingSettings .RTCprependCommand != " " ? (mProcessingSettings .RTCprependCommand + " " ) : " " );
5660 baseCommand += (getenv (" O2_GPU_RTC_OVERRIDE_CMD" ) ? std::string (getenv (" O2_GPU_RTC_OVERRIDE_CMD" )) : std::string (_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len));
5761 baseCommand += std::string (" " ) + (mProcessingSettings .RTCoverrideArchitecture != " " ? mProcessingSettings .RTCoverrideArchitecture : std::string (_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len));
62+ const std::string launchBounds = o2::gpu::internal::GPUDefParametersExport (*mParDevice , true );
5863
59- char shasource[21 ], shaparam[21 ], shacmd[21 ], shakernels[21 ];
64+ char shasource[21 ], shaparam[21 ], shacmd[21 ], shakernels[21 ], shabounds[ 21 ] ;
6065 if (mProcessingSettings .rtc .cacheOutput ) {
6166 o2::framework::internal::SHA1 (shasource, _binary_GPUReconstructionCUDArtc_src_start, _binary_GPUReconstructionCUDArtc_src_len);
6267 o2::framework::internal::SHA1 (shaparam, rtcparam.c_str (), rtcparam.size ());
6368 o2::framework::internal::SHA1 (shacmd, baseCommand.c_str (), baseCommand.size ());
6469 o2::framework::internal::SHA1 (shakernels, kernelsall.c_str (), kernelsall.size ());
70+ o2::framework::internal::SHA1 (shabounds, launchBounds.c_str (), launchBounds.size ());
6571 }
6672
6773 nCompile = mProcessingSettings .rtc .compilePerKernel ? kernels.size () : 1 ;
@@ -88,32 +94,29 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
8894 if (fp) {
8995 size_t len;
9096 while (true ) {
91- if (fread (sharead, 1 , 20 , fp) != 20 ) {
92- throw std::runtime_error (" Cache file corrupt" );
93- }
94- if (!mProcessingSettings .rtc .ignoreCacheValid && memcmp (sharead, shasource, 20 )) {
95- GPUInfo (" Cache file content outdated (source)" );
96- break ;
97- }
98- if (fread (sharead, 1 , 20 , fp) != 20 ) {
99- throw std::runtime_error (" Cache file corrupt" );
100- }
101- if (!mProcessingSettings .rtc .ignoreCacheValid && memcmp (sharead, shaparam, 20 )) {
102- GPUInfo (" Cache file content outdated (param)" );
103- break ;
104- }
105- if (fread (sharead, 1 , 20 , fp) != 20 ) {
106- throw std::runtime_error (" Cache file corrupt" );
107- }
108- if (!mProcessingSettings .rtc .ignoreCacheValid && memcmp (sharead, shacmd, 20 )) {
109- GPUInfo (" Cache file content outdated (commandline)" );
110- break ;
111- }
112- if (fread (sharead, 1 , 20 , fp) != 20 ) {
113- throw std::runtime_error (" Cache file corrupt" );
114- }
115- if (!mProcessingSettings .rtc .ignoreCacheValid && memcmp (sharead, shakernels, 20 )) {
116- GPUInfo (" Cache file content outdated (kernel definitions)" );
97+ auto checkSHA = [&](const char * shacmp, const char * name) {
98+ if (fread (sharead, 1 , 20 , fp) != 20 ) {
99+ throw std::runtime_error (" Cache file corrupt" );
100+ }
101+ if (mProcessingSettings .debugLevel >= 3 ) {
102+ char shaprint1[41 ], shaprint2[41 ];
103+ for (uint32_t i = 0 ; i < 20 ; i++) {
104+ sprintf (shaprint1 + 2 * i, " %02X " , shacmp[i]);
105+ sprintf (shaprint2 + 2 * i, " %02X " , sharead[i]);
106+ }
107+ GPUInfo (" SHA for %s: expected %s, read %s" , name, shaprint1, shaprint2);
108+ }
109+ if (!mProcessingSettings .rtc .ignoreCacheValid && memcmp (sharead, shacmp, 20 )) {
110+ GPUInfo (" Cache file content outdated (%s)" , name);
111+ return 1 ;
112+ }
113+ return 0 ;
114+ };
115+ if (checkSHA (shasource, " source" ) ||
116+ checkSHA (shaparam, " param" ) ||
117+ checkSHA (shacmd, " command line" ) ||
118+ checkSHA (shakernels, " kernel definitions" ) ||
119+ checkSHA (shabounds, " launch bounds" )) {
117120 break ;
118121 }
119122 GPUSettingsProcessingRTC cachedSettings;
@@ -169,11 +172,12 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
169172 kernel += mProcessingSettings .rtc .compilePerKernel ? kernels[i] : kernelsall;
170173 kernel += " }" ;
171174
172- bool deterministic = mProcessingSettings .rtc .deterministic || o2::gpu::internal::noFastMathKernels.find (GetKernelName (i)) != o2::gpu::internal::noFastMathKernels.end ();
175+ bool deterministic = mProcessingSettings .rtc .deterministic || ( mProcessingSettings . rtc . compilePerKernel && o2::gpu::internal::noFastMathKernels.find (GetKernelName (i)) != o2::gpu::internal::noFastMathKernels.end () );
173176 const std::string deterministicStr = std::string (deterministic ? " #define GPUCA_DETERMINISTIC_CODE(det, indet) det\n " : " #define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n " );
174177
175178 if (fwrite (deterministicStr.c_str (), 1 , deterministicStr.size (), fp) != deterministicStr.size () ||
176179 fwrite (rtcparam.c_str (), 1 , rtcparam.size (), fp) != rtcparam.size () ||
180+ fwrite (launchBounds.c_str (), 1 , launchBounds.size (), fp) != launchBounds.size () ||
177181 fwrite (_binary_GPUReconstructionCUDArtc_src_start, 1 , _binary_GPUReconstructionCUDArtc_src_len, fp) != _binary_GPUReconstructionCUDArtc_src_len ||
178182 fwrite (kernel.c_str (), 1 , kernel.size (), fp) != kernel.size ()) {
179183 throw std::runtime_error (" Error writing file" );
@@ -213,6 +217,7 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
213217 fwrite (shaparam, 1 , 20 , fp) != 20 ||
214218 fwrite (shacmd, 1 , 20 , fp) != 20 ||
215219 fwrite (shakernels, 1 , 20 , fp) != 20 ||
220+ fwrite (shabounds, 1 , 20 , fp) != 20 ||
216221 fwrite (&mProcessingSettings .rtc , sizeof (mProcessingSettings .rtc ), 1 , fp) != 1 ) {
217222 throw std::runtime_error (" Error writing cache file" );
218223 }
0 commit comments