Skip to content

Commit 70320c3

Browse files
committed
Compiles on EPNs. Need to add shadow processors next. But for this, I will merge #14069 to have the changes in GPUChainTrackingClusterizer.
1 parent 46fb1e1 commit 70320c3

File tree

9 files changed

+90
-40
lines changed

9 files changed

+90
-40
lines changed

Common/ML/include/ML/OrtInterface.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,11 @@
2626
// O2 includes
2727
#include "Framework/Logger.h"
2828

29+
namespace Ort {
30+
struct SessionOptions;
31+
struct MemoryInfo;
32+
}
33+
2934
namespace o2
3035
{
3136

@@ -42,6 +47,8 @@ class OrtModel
4247
void init(std::unordered_map<std::string, std::string> optionsMap) { reset(optionsMap); }
4348
void reset(std::unordered_map<std::string, std::string>);
4449
bool isInitialized() { return mInitialized; }
50+
Ort::SessionOptions* updateSessionOptions();
51+
Ort::MemoryInfo* updateMemoryInfo();
4552

4653
virtual ~OrtModel() = default;
4754

Common/ML/src/OrtInterface.cxx

Lines changed: 41 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,16 @@ struct OrtModel::OrtVariables { // The actual implementation is hidden in the .c
3535
Ort::MemoryInfo memoryInfo = Ort::MemoryInfo("Cpu", OrtAllocatorType::OrtDeviceAllocator, 0, OrtMemType::OrtMemTypeDefault);
3636
};
3737

38+
Ort::SessionOptions* OrtModel::updateSessionOptions()
39+
{
40+
return &(pImplOrt->sessionOptions);
41+
}
42+
43+
Ort::MemoryInfo* OrtModel::updateMemoryInfo()
44+
{
45+
return &(pImplOrt->memoryInfo);
46+
}
47+
3848
void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
3949
{
4050

@@ -56,39 +66,41 @@ void OrtModel::reset(std::unordered_map<std::string, std::string> optionsMap)
5666
enableProfiling = (optionsMap.contains("enable-profiling") ? std::stoi(optionsMap["enable-profiling"]) : 0);
5767
enableOptimizations = (optionsMap.contains("enable-optimizations") ? std::stoi(optionsMap["enable-optimizations"]) : 0);
5868

59-
std::string dev_mem_str = "Hip";
60-
#if defined(ORT_ROCM_BUILD)
61-
#if ORT_ROCM_BUILD == 1
62-
if (device == "ROCM") {
63-
// Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, streamId));
64-
o2::gpu::SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
65-
LOG(info) << "(ORT) ROCM execution provider set";
66-
}
67-
#endif
68-
#endif
69-
#if defined(ORT_MIGRAPHX_BUILD)
70-
#if ORT_MIGRAPHX_BUILD == 1
71-
if (device == "MIGRAPHX") {
72-
Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, streamId));
73-
LOG(info) << "(ORT) MIGraphX execution provider set";
74-
}
75-
#endif
76-
#endif
77-
#if defined(ORT_CUDA_BUILD)
78-
#if ORT_CUDA_BUILD == 1
79-
if (device == "CUDA") {
80-
// Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, streamId));
81-
o2::gpu::SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
82-
LOG(info) << "(ORT) CUDA execution provider set";
83-
dev_mem_str = "Cuda";
84-
}
85-
#endif
86-
#endif
87-
69+
// #if defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1
70+
// if (device == "ROCM") {
71+
// // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_ROCM(pImplOrt->sessionOptions, streamId));
72+
// SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
73+
// LOG(info) << "(ORT) ROCM execution provider set";
74+
// }
75+
// #endif
76+
// #if defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1
77+
// if (device == "MIGRAPHX") {
78+
// Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_MIGraphX(pImplOrt->sessionOptions, streamId));
79+
// LOG(info) << "(ORT) MIGraphX execution provider set";
80+
// }
81+
// #endif
82+
// #if defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1
83+
// if (device == "CUDA") {
84+
// // Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CUDA(pImplOrt->sessionOptions, streamId));
85+
// SetONNXGPUStream(pImplOrt->sessionOptions, streamId);
86+
// LOG(info) << "(ORT) CUDA execution provider set";
87+
// dev_mem_str = "Cuda";
88+
// }
89+
// #endif
90+
91+
#if (defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1) || (defined(ORT_MIGRAPHX_BUILD) && ORT_MIGRAPHX_BUILD == 1) || (defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1)
8892
if (allocateDeviceMemory) {
93+
std::string dev_mem_str = "";
94+
if (device == "ROCM") {
95+
dev_mem_str = "Hip";
96+
}
97+
if (device == "CUDA") {
98+
dev_mem_str = "Cuda";
99+
}
89100
pImplOrt->memoryInfo = Ort::MemoryInfo(dev_mem_str.c_str(), OrtAllocatorType::OrtDeviceAllocator, streamId, OrtMemType::OrtMemTypeDefault);
90101
LOG(info) << "(ORT) Memory info set to on-device memory";
91102
}
103+
#endif
92104

93105
if (device == "CPU") {
94106
(pImplOrt->sessionOptions).SetIntraOpNumThreads(intraOpNumThreads);

GPU/GPUTracking/Base/GPUReconstructionCPU.h

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@
2424
#include "GPUReconstructionKernelIncludes.h"
2525
#include "GPUReconstructionKernels.h"
2626

27+
namespace Ort {
28+
struct SessionOptions;
29+
}
30+
2731
namespace o2::gpu
2832
{
2933

@@ -111,6 +115,9 @@ class GPUReconstructionCPU : public GPUReconstructionKernels<GPUReconstructionCP
111115
size_t WriteToConstantMemory(size_t offset, const void* src, size_t size, int32_t stream = -1, deviceEvent* ev = nullptr) override;
112116
virtual size_t TransferMemoryInternal(GPUMemoryResource* res, int32_t stream, deviceEvent* ev, deviceEvent* evList, int32_t nEvents, bool toGPU, const void* src, void* dst);
113117

118+
// ONNX runtime
119+
virtual void SetONNXGPUStream(Ort::SessionOptions*, int32_t) {}
120+
114121
int32_t InitDevice() override;
115122
int32_t ExitDevice() override;
116123
int32_t GetThread();

GPU/GPUTracking/Base/GPUReconstructionProcessing.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -90,7 +90,7 @@ class GPUReconstructionProcessing : public GPUReconstruction
9090
void AddGPUEvents(T*& events);
9191

9292
virtual std::unique_ptr<gpu_reconstruction_kernels::threadContext> GetThreadContext() override;
93-
virtual int32_t SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) { return 0; }
93+
virtual void SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) {}
9494

9595
struct RecoStepTimerMeta {
9696
HighResTimer timerToGPU;

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.cu

Lines changed: 11 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -661,24 +661,23 @@ void GPUReconstructionCUDA::endGPUProfiling()
661661
GPUChkErr(cudaProfilerStop());
662662
}
663663

664-
#ifdef GPUCA_HAS_ONNX
665-
int32_t GPUReconstructionCUDA::SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream)
664+
#if defined(ORT_CUDA_BUILD) && ORT_CUDA_BUILD == 1
665+
void GPUReconstructionCUDA::SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream)
666666
{
667667
OrtCUDAProviderOptionsV2* cuda_options = nullptr;
668668
CreateCUDAProviderOptions(&cuda_options);
669+
OrtSessionOptions* raw_options = session_options->operator OrtSessionOptions*();
669670

670671
// std::vector<const char*> keys{"device_id", "gpu_mem_limit", "arena_extend_strategy", "cudnn_conv_algo_search", "do_copy_in_default_stream", "cudnn_conv_use_max_workspace", "cudnn_conv1d_pad_to_nc1d"};
671672
// std::vector<const char*> values{"0", "2147483648", "kSameAsRequested", "DEFAULT", "1", "1", "1"};
672673
// UpdateCUDAProviderOptions(cuda_options, keys.data(), values.data(), keys.size());
673674

674675
// this implicitly sets "has_user_compute_stream"
675676
UpdateCUDAProviderOptionsWithValue(cuda_options, "user_compute_stream", &mInternals->Streams[stream]);
676-
Ort::ThrowOnError(SessionOptionsAppendExecutionProvider_CUDA_V2(session_options, cuda_options));
677+
Ort::ThrowOnError(SessionOptionsAppendExecutionProvider_CUDA_V2(raw_options, cuda_options));
677678

678679
// Finally, don't forget to release the provider options
679680
ReleaseCUDAProviderOptions(cuda_options);
680-
681-
return 0;
682681
}
683682
#endif // GPUCA_HAS_ONNX
684683

@@ -690,19 +689,22 @@ void* GPUReconstructionHIP::getGPUPointer(void* ptr)
690689
return retVal;
691690
}
692691

693-
#ifdef GPUCA_HAS_ONNX
694-
int32_t GPUReconstructionHIP::SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream)
692+
#if defined(ORT_ROCM_BUILD) && ORT_ROCM_BUILD == 1
693+
void GPUReconstructionHIP::SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream)
695694
{
696695
// Create ROCm provider options
697696
const auto& api = Ort::GetApi();
698697
OrtROCMProviderOptions rocm_options{};
699698
rocm_options.has_user_compute_stream = 1; // Indicate that we are passing a user stream
700699
rocm_options.user_compute_stream = &mInternals->Streams[stream];
701700

701+
// Get the raw OrtSessionOptions pointer from the Ort::SessionOptions wrapper
702+
OrtSessionOptions* raw_options = session_options->operator OrtSessionOptions*();
703+
702704
// Append the ROCm execution provider with the custom HIP stream
703-
Ort::ThrowOnError(api.SessionOptionsAppendExecutionProvider_ROCM(session_options, &rocm_options));
704-
return 0;
705+
Ort::ThrowOnError(api.SessionOptionsAppendExecutionProvider_ROCM(raw_options, &rocm_options));
705706
}
707+
706708
#endif // GPUCA_HAS_ONNX
707709
#endif // __HIPCC__
708710

GPU/GPUTracking/Base/cuda/GPUReconstructionCUDA.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,10 @@ extern "C" __declspec(dllexport) o2::gpu::GPUReconstruction* GPUReconstruction_C
2525
extern "C" o2::gpu::GPUReconstruction* GPUReconstruction_Create_CUDA(const o2::gpu::GPUSettingsDeviceBackend& cfg);
2626
#endif
2727

28+
namespace Ort {
29+
struct SessionOptions;
30+
}
31+
2832
namespace o2::gpu
2933
{
3034
struct GPUReconstructionCUDAInternals;
@@ -79,7 +83,7 @@ class GPUReconstructionCUDA : public GPUReconstructionKernels<GPUReconstructionC
7983
size_t GPUMemCpy(void* dst, const void* src, size_t size, int32_t stream, int32_t toGPU, deviceEvent* ev = nullptr, deviceEvent* evList = nullptr, int32_t nEvents = 1) override;
8084
void ReleaseEvent(deviceEvent ev) override;
8185
void RecordMarker(deviceEvent* ev, int32_t stream) override;
82-
int32_t SetONNXGPUStream(OrtSessionOptions* session_options, int32_t stream) override;
86+
void SetONNXGPUStream(Ort::SessionOptions* session_options, int32_t stream) override;
8387

8488
void GetITSTraits(std::unique_ptr<o2::its::TrackerTraits>* trackerTraits, std::unique_ptr<o2::its::VertexerTraits>* vertexerTraits, std::unique_ptr<o2::its::TimeFrame>* timeFrame) override;
8589

GPU/GPUTracking/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,20 @@ set(MODULE GPUTracking)
1414
# set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} -O0") # to uncomment if needed, tired of typing this...
1515
# set(GPUCA_BUILD_DEBUG 1)
1616

17+
# Pass ORT variables as a preprocessor definition
18+
if(DEFINED ENV{ORT_ROCM_BUILD})
19+
add_compile_definitions(ORT_ROCM_BUILD=$ENV{ORT_ROCM_BUILD})
20+
endif()
21+
if(DEFINED ENV{ORT_CUDA_BUILD})
22+
add_compile_definitions(ORT_CUDA_BUILD=$ENV{ORT_CUDA_BUILD})
23+
endif()
24+
if(DEFINED ENV{ORT_MIGRAPHX_BUILD})
25+
add_compile_definitions(ORT_MIGRAPHX_BUILD=$ENV{ORT_MIGRAPHX_BUILD})
26+
endif()
27+
if(DEFINED ENV{ORT_TENSORRT_BUILD})
28+
add_compile_definitions(ORT_TENSORRT_BUILD=$ENV{ORT_TENSORRT_BUILD})
29+
endif()
30+
1731
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_NO_FAST_MATH})
1832
set(CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER} "${CMAKE_CXX_FLAGS_${CMAKE_BUILD_TYPE_UPPER}} ${GPUCA_CXX_NO_FAST_MATH_FLAGS}")
1933
if(GPUCA_DETERMINISTIC_MODE GREATER_EQUAL ${GPUCA_DETERMINISTIC_MODE_MAP_OPTO2})

GPU/GPUTracking/Global/GPUChain.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,7 @@ class GPUChain
8383
inline GPUParam& param() { return mRec->param(); }
8484
inline const GPUConstantMem* processors() const { return mRec->processors(); }
8585
inline void SynchronizeStream(int32_t stream) { mRec->SynchronizeStream(stream); }
86+
inline void SetONNXGPUStream(Ort::SessionOptions* opt, int32_t stream) { mRec->SetONNXGPUStream(opt, stream); }
8687
inline void SynchronizeEvents(deviceEvent* evList, int32_t nEvents = 1) { mRec->SynchronizeEvents(evList, nEvents); }
8788
inline void SynchronizeEventAndRelease(deviceEvent& ev, bool doGPU = true)
8889
{

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -917,6 +917,9 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
917917
GPUTPCNNClusterizer& clustererNN = processors()->tpcNNClusterer[iSector];
918918
const GPUSettingsProcessingNNclusterizer& nn_settings = GetProcessingSettings().nn;
919919
GPUTPCNNClusterizerHost nnApplication(nn_settings, clustererNN, lane);
920+
SetONNXGPUStream(nnApplication.model_class.updateSessionOptions(), lane);
921+
SetONNXGPUStream(nnApplication.model_reg_1.updateSessionOptions(), lane);
922+
SetONNXGPUStream(nnApplication.model_reg_2.updateSessionOptions(), lane);
920923

921924
if (clustererNN.nnClusterizerUseCfRegression || (int)(nn_settings.nnClusterizerApplyCfDeconvolution)) {
922925
runKernel<GPUTPCCFDeconvolution>({GetGrid(clusterer.mPmemory->counters.nPositions, lane), {iSector}});

0 commit comments

Comments
 (0)