@@ -31,11 +31,12 @@ QGET_LD_BINARY_SYMBOLS(GPUReconstructionCUDArtc_command);
3131QGET_LD_BINARY_SYMBOLS (GPUReconstructionCUDArtc_command_arch);
3232QGET_LD_BINARY_SYMBOLS (GPUReconstructionCUDArtc_command_no_fast_math);
3333
34+ #include " GPUNoFastMathKernels.h"
35+
3436int32_t GPUReconstructionCUDA::genRTC (std::string& filename, uint32_t & nCompile)
3537{
3638 std::string rtcparam = std::string (" #define GPUCA_RTC_CODE\n " ) +
3739 std::string (mProcessingSettings .rtc .optSpecialCode ? " #define GPUCA_RTC_SPECIAL_CODE(...) __VA_ARGS__\n " : " #define GPUCA_RTC_SPECIAL_CODE(...)\n " ) +
38- std::string (mProcessingSettings .rtc .deterministic ? " #define GPUCA_DETERMINISTIC_CODE(det, indet) det\n " : " #define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n " ) +
3940 GPUParamRTC::generateRTCCode (param (), mProcessingSettings .rtc .optConstexpr );
4041 if (filename == " " ) {
4142 filename = " /tmp/o2cagpu_rtc_" ;
@@ -54,7 +55,6 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
5455 std::string baseCommand = (mProcessingSettings .RTCprependCommand != " " ? (mProcessingSettings .RTCprependCommand + " " ) : " " );
5556 baseCommand += (getenv (" O2_GPU_RTC_OVERRIDE_CMD" ) ? std::string (getenv (" O2_GPU_RTC_OVERRIDE_CMD" )) : std::string (_binary_GPUReconstructionCUDArtc_command_start, _binary_GPUReconstructionCUDArtc_command_len));
5657 baseCommand += std::string (" " ) + (mProcessingSettings .RTCoverrideArchitecture != " " ? mProcessingSettings .RTCoverrideArchitecture : std::string (_binary_GPUReconstructionCUDArtc_command_arch_start, _binary_GPUReconstructionCUDArtc_command_arch_len));
57- baseCommand += mProcessingSettings .rtc .deterministic ? (std::string (" " ) + std::string (_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len)) : std::string (" " );
5858
5959 char shasource[21 ], shaparam[21 ], shacmd[21 ], shakernels[21 ];
6060 if (mProcessingSettings .rtc .cacheOutput ) {
@@ -169,13 +169,20 @@ int32_t GPUReconstructionCUDA::genRTC(std::string& filename, uint32_t& nCompile)
169169 kernel += mProcessingSettings .rtc .compilePerKernel ? kernels[i] : kernelsall;
170170 kernel += " }" ;
171171
172- if (fwrite (rtcparam.c_str (), 1 , rtcparam.size (), fp) != rtcparam.size () ||
172+ bool deterministic = mProcessingSettings .rtc .deterministic || o2::gpu::internal::noFastMathKernels.find (GetKernelName (i)) != o2::gpu::internal::noFastMathKernels.end ();
173+ const std::string deterministicStr = std::string (deterministic ? " #define GPUCA_DETERMINISTIC_CODE(det, indet) det\n " : " #define GPUCA_DETERMINISTIC_CODE(det, indet) indet\n " );
174+
175+ if (fwrite (deterministicStr.c_str (), 1 , deterministicStr.size (), fp) != deterministicStr.size () ||
176+ fwrite (rtcparam.c_str (), 1 , rtcparam.size (), fp) != rtcparam.size () ||
173177 fwrite (_binary_GPUReconstructionCUDArtc_src_start, 1 , _binary_GPUReconstructionCUDArtc_src_len, fp) != _binary_GPUReconstructionCUDArtc_src_len ||
174178 fwrite (kernel.c_str (), 1 , kernel.size (), fp) != kernel.size ()) {
175179 throw std::runtime_error (" Error writing file" );
176180 }
177181 fclose (fp);
178182 std::string command = baseCommand;
183+ if (deterministic) {
184+ command += std::string (" " ) + std::string (_binary_GPUReconstructionCUDArtc_command_no_fast_math_start, _binary_GPUReconstructionCUDArtc_command_no_fast_math_len);
185+ }
179186 command += " -c " + filename + " _" + std::to_string (i) + mRtcSrcExtension + " -o " + filename + " _" + std::to_string (i) + mRtcBinExtension ;
180187 if (mProcessingSettings .debugLevel < 0 ) {
181188 command += " &> /dev/null" ;
0 commit comments