Skip to content

Commit 5284b01

Browse files
committed
Using char* buffer for model loading
1 parent 125f3e2 commit 5284b01

File tree

7 files changed

+109
-73
lines changed

7 files changed

+109
-73
lines changed

Common/ML/include/ML/OrtInterface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -51,7 +51,7 @@ class OrtModel
5151
void initOptions(std::unordered_map<std::string, std::string> optionsMap);
5252
void initEnvironment();
5353
void initSession();
54-
void initSessionFromBuffer(const void* buffer, size_t bufferSize);
54+
void initSessionFromBuffer(const char* buffer, size_t bufferSize);
5555
void memoryOnDevice(int32_t = 0);
5656
bool isInitialized() { return mInitialized; }
5757
void resetSession();

Common/ML/src/OrtInterface.cxx

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -138,10 +138,13 @@ void OrtModel::initEnvironment()
138138
(mPImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
139139
}
140140

141-
void OrtModel::initSessionFromBuffer(const void* buffer, size_t bufferSize)
141+
void OrtModel::initSessionFromBuffer(const char* buffer, size_t bufferSize)
142142
{
143+
mPImplOrt->sessionOptions.AddConfigEntry("session.load_model_format", "ONNX");
144+
mPImplOrt->sessionOptions.AddConfigEntry("session.use_ort_model_bytes_directly", "1");
145+
143146
mPImplOrt->session = std::make_unique<Ort::Session>(*mPImplOrt->env,
144-
static_cast<const uint8_t*>(buffer),
147+
buffer,
145148
bufferSize,
146149
mPImplOrt->sessionOptions);
147150
mPImplOrt->ioBinding = std::make_unique<Ort::IoBinding>(*mPImplOrt->session);

GPU/GPUTracking/DataTypes/GPUDataTypes.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -182,6 +182,10 @@ struct GPUCalibObjectsTemplate { // use only pointers on PODs or flat objects he
182182
typename S<o2::tpc::CalibdEdxContainer>::type* dEdxCalibContainer = nullptr;
183183
typename S<o2::base::PropagatorImpl<float>>::type* o2Propagator = nullptr;
184184
typename S<o2::itsmft::TopologyDictionary>::type* itsPatternDict = nullptr;
185+
186+
// NN clusterizer objects
187+
char* nnClusterizerNetworks[3] = {nullptr, nullptr, nullptr}; // [c, r1, r2] networks as char arrays from CCDB
188+
uint32_t nnClusterizerNetworkSizes[3] = {0, 0, 0};
185189
};
186190
typedef GPUCalibObjectsTemplate<DefaultPtr> GPUCalibObjects; // NOTE: These 2 must have identical layout since they are memcopied
187191
typedef GPUCalibObjectsTemplate<ConstPtr> GPUCalibObjectsConst;

GPU/GPUTracking/Global/GPUChainTrackingClusterizer.cxx

Lines changed: 16 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -639,7 +639,7 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
639639
// Maximum of 4 lanes supported
640640
HighResTimer* nnTimers[12];
641641

642-
if (GetProcessingSettings().nn.applyNNclusterizer) {
642+
if (nn_settings.applyNNclusterizer) {
643643
int32_t deviceId = -1;
644644
int32_t numLanes = GetProcessingSettings().nTPCClustererLanes;
645645
int32_t maxThreads = mRec->getNKernelHostThreads(true);
@@ -677,7 +677,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
677677
// nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
678678
// }
679679
// recreateMemoryAllocator = true;
680-
(nnApplications[lane].mModelClass).initSession();
680+
if (!nn_settings.nnLoadFromCCDB){
681+
(nnApplications[lane].mModelClass).initSession(); // loads from file
682+
} else {
683+
(nnApplications[lane].mModelClass).initSessionFromBuffer(processors()->calibObjects.nnClusterizerNetworks[0], processors()->calibObjects.nnClusterizerNetworkSizes[0]); // loads from CCDB
684+
}
681685
}
682686
if (nnApplications[lane].mModelsUsed[1]) {
683687
SetONNXGPUStream(*(nnApplications[lane].mModelReg1).getSessionOptions(), lane, &deviceId);
@@ -688,7 +692,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
688692
// (nnApplications[lane].mModelReg1).setEnv((nnApplications[lane].mModelClass).getEnv());
689693
(nnApplications[lane].mModelReg1).initEnvironment();
690694
// nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelReg1).getEnv(), (nnApplications[lane].mModelReg1).getMemoryInfo(), mRec, recreateMemoryAllocator);
691-
(nnApplications[lane].mModelReg1).initSession();
695+
if (!nn_settings.nnLoadFromCCDB){
696+
(nnApplications[lane].mModelReg1).initSession(); // loads from file
697+
} else {
698+
(nnApplications[lane].mModelReg1).initSessionFromBuffer(processors()->calibObjects.nnClusterizerNetworks[1], processors()->calibObjects.nnClusterizerNetworkSizes[1]); // loads from CCDB
699+
}
692700
}
693701
if (nnApplications[lane].mModelsUsed[2]) {
694702
SetONNXGPUStream(*(nnApplications[lane].mModelReg2).getSessionOptions(), lane, &deviceId);
@@ -699,7 +707,11 @@ int32_t GPUChainTracking::RunTPCClusterizer(bool synchronizeOutput)
699707
// (nnApplications[lane].mModelReg2).setEnv((nnApplications[lane].mModelClass).getEnv());
700708
(nnApplications[lane].mModelReg2).initEnvironment();
701709
// nnApplications[lane].directOrtAllocator((nnApplications[lane].mModelClass).getEnv(), (nnApplications[lane].mModelClass).getMemoryInfo(), mRec, recreateMemoryAllocator);
702-
(nnApplications[lane].mModelReg2).initSession();
710+
if (!nn_settings.nnLoadFromCCDB){
711+
(nnApplications[lane].mModelReg2).initSession(); // loads from file
712+
} else {
713+
(nnApplications[lane].mModelReg2).initSessionFromBuffer(processors()->calibObjects.nnClusterizerNetworks[2], processors()->calibObjects.nnClusterizerNetworkSizes[2]); // loads from CCDB
714+
}
703715
}
704716
if (nn_settings.nnClusterizerVerbosity > 0) {
705717
LOG(info) << "(ORT) Allocated ONNX stream for lane " << lane << " and device " << deviceId;

GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,8 @@ class GPURecoWorkflowSpec : public o2::framework::Task
138138

139139
// NN clusterizer
140140
bool nnLoadFromCCDB = false;
141+
bool nnDumpToFile = false;
142+
std::vector<std::string> nnEvalMode;
141143
};
142144

143145
GPURecoWorkflowSpec(CompletionPolicyData* policyData, Config const& specconfig, std::vector<int32_t> const& tpcsectors, uint64_t tpcSectorMask, std::shared_ptr<o2::base::GRPGeomRequest>& ggr, std::function<bool(o2::framework::DataProcessingHeader::StartTime)>** gPolicyOrder = nullptr);

GPU/Workflow/src/GPUWorkflowSpec.cxx

Lines changed: 11 additions & 66 deletions
Original file line numberDiff line numberDiff line change
@@ -770,64 +770,6 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc)
770770

771771
// ------------------------------ Actual processing ------------------------------
772772

773-
if (mSpecConfig.nnLoadFromCCDB) {
774-
LOG(info) << "(NN CLUS) Fetching CCDB calib objects";
775-
776-
auto dumpOnnxToFile = [](const char* buffer, std::size_t size, const std::string& path) {
777-
const char* marker = "Accept-Ranges";
778-
const char* pos = std::search(buffer, buffer + size, marker, marker + std::strlen(marker));
779-
780-
// Compute the actual number of bytes to write
781-
std::size_t writeSize = (pos != buffer + size)
782-
? static_cast<std::size_t>(pos - buffer)
783-
: size;
784-
785-
std::ofstream out(path, std::ios::binary | std::ios::trunc);
786-
if (!out.is_open()) {
787-
throw std::runtime_error("Failed to open ONNX output file: " + path);
788-
}
789-
790-
out.write(buffer, static_cast<std::streamsize>(writeSize));
791-
if (!out) {
792-
throw std::runtime_error("Failed while writing ONNX data to: " + path);
793-
}
794-
};
795-
796-
GPUSettingsProcessingNNclusterizer& nnClusterizerSettings = mConfig->configProcessing.nn;
797-
std::vector<std::string> evalMode = o2::utils::Str::tokenize(nnClusterizerSettings.nnEvalMode, ':');
798-
799-
DataRef m;
800-
if (evalMode[0] == "c1") {
801-
m = pc.inputs().get("nn_classification_c1");
802-
const char* buffer = const_cast<char*>(m.payload);
803-
size_t size = DataRefUtils::getPayloadSize(m);
804-
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
805-
dumpOnnxToFile(buffer, size, "net_classification_c1.onnx");
806-
}
807-
} else if (evalMode[0] == "c2") {
808-
m = pc.inputs().get("nn_classification_c2");
809-
const char* buffer = const_cast<char*>(m.payload);
810-
size_t size = DataRefUtils::getPayloadSize(m);
811-
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
812-
dumpOnnxToFile(buffer, size, "net_classification_c2.onnx");
813-
}
814-
}
815-
816-
m = pc.inputs().get("nn_regression_c1");
817-
const char* buffer = const_cast<char*>(m.payload);
818-
size_t size = DataRefUtils::getPayloadSize(m);
819-
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
820-
dumpOnnxToFile(buffer, size, "net_regression_c1.onnx");
821-
}
822-
if (evalMode[1] == "r2") {
823-
m = pc.inputs().get("nn_regression_c2");
824-
const char* buffer = const_cast<char*>(m.payload);
825-
size_t size = DataRefUtils::getPayloadSize(m);
826-
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
827-
dumpOnnxToFile(buffer, size, "net_regression_c2.onnx");
828-
}
829-
}
830-
}
831773
if ((int32_t)(ptrs.tpcZS != nullptr) + (int32_t)(ptrs.tpcPackedDigits != nullptr && (ptrs.tpcZS == nullptr || ptrs.tpcPackedDigits->tpcDigitsMC == nullptr)) + (int32_t)(ptrs.clustersNative != nullptr) + (int32_t)(ptrs.tpcCompressedClusters != nullptr) != 1) {
832774
throw std::runtime_error("Invalid input for gpu tracking");
833775
}
@@ -1138,6 +1080,12 @@ void GPURecoWorkflowSpec::doCalibUpdates(o2::framework::ProcessingContext& pc, c
11381080
newCalibValues.tpcTimeBinCut = mConfig->configGRP.tpcCutTimeBin = mTPCCutAtTimeBin;
11391081
needCalibUpdate = true;
11401082
}
1083+
if (mSpecConfig.nnLoadFromCCDB) {
1084+
for (int i = 0; i < 3; i++) {
1085+
newCalibObjects.nnClusterizerNetworks[i] = mConfig->configCalib.nnClusterizerNetworks[i];
1086+
newCalibObjects.nnClusterizerNetworkSizes[i] = mConfig->configCalib.nnClusterizerNetworkSizes[i];
1087+
}
1088+
}
11411089
if (needCalibUpdate) {
11421090
LOG(info) << "Updating GPUReconstruction calibration objects";
11431091
mGPUReco->UpdateCalibration(newCalibObjects, newCalibValues);
@@ -1282,6 +1230,7 @@ Inputs GPURecoWorkflowSpec::inputs()
12821230

12831231
LOG(info) << "(NN CLUS) Enabling fetching of TPC NN clusterizer from CCDB";
12841232
mSpecConfig.nnLoadFromCCDB = true;
1233+
mSpecConfig.nnDumpToFile = mConfig->configProcessing.nn.nnCCDBDumpToFile;
12851234
GPUSettingsProcessingNNclusterizer& nnClusterizerSettings = mConfig->configProcessing.nn;
12861235

12871236
std::map<std::string, std::string> metadata;
@@ -1300,7 +1249,7 @@ Inputs GPURecoWorkflowSpec::inputs()
13001249
}
13011250
};
13021251

1303-
std::vector<std::string> evalMode = o2::utils::Str::tokenize(nnClusterizerSettings.nnEvalMode, ':');
1252+
mSpecConfig.nnEvalMode = o2::utils::Str::tokenize(nnClusterizerSettings.nnEvalMode, ':');
13041253
std::vector<o2::framework::CCDBMetadata> ccdb_metadata;
13051254

13061255
if (mConfParam->printSettings) {
@@ -1313,29 +1262,25 @@ Inputs GPURecoWorkflowSpec::inputs()
13131262
printSettings(metadata);
13141263
}
13151264

1316-
if (evalMode[0] == "c1") {
1265+
if (mSpecConfig.nnEvalMode[0] == "c1") {
13171266
metadata["nnCCDBEvalType"] = "classification_c1";
13181267
convert_map_to_metadata(metadata, ccdb_metadata);
13191268
inputs.emplace_back("nn_classification_c1", "TPC", "NNCLUSTERIZER_C1", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1320-
LOG(info) << "(NN CLUS) Loading NN clusterizer classification (c1) from CCDB";
1321-
} else if (evalMode[0] == "c2") {
1269+
} else if (mSpecConfig.nnEvalMode[0] == "c2") {
13221270
metadata["nnCCDBEvalType"] = "classification_c2";
13231271
convert_map_to_metadata(metadata, ccdb_metadata);
13241272
inputs.emplace_back("nn_classification_c2", "TPC", "NNCLUSTERIZER_C2", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1325-
LOG(info) << "(NN CLUS) Loading NN clusterizer classification (c2) from CCDB";
13261273
}
13271274

13281275
metadata["nnCCDBEvalType"] = "regression_c1";
13291276
metadata["nnCCDBLayerType"] = nnClusterizerSettings.nnCCDBRegressionLayerType;
13301277
convert_map_to_metadata(metadata, ccdb_metadata);
13311278
inputs.emplace_back("nn_regression_c1", "TPC", "NNCLUSTERIZER_R1", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1332-
LOG(info) << "(NN CLUS) Loading NN clusterizer regression (r1) from CCDB";
13331279

1334-
if (evalMode[1] == "r2") {
1280+
if (mSpecConfig.nnEvalMode[1] == "r2") {
13351281
metadata["nnCCDBEvalType"] = "regression_c2";
13361282
convert_map_to_metadata(metadata, ccdb_metadata);
13371283
inputs.emplace_back("nn_regression_c2", "TPC", "NNCLUSTERIZER_R2", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1338-
LOG(info) << "(NN CLUS) Loading NN clusterizer regression (r2) from CCDB";
13391284
}
13401285
}
13411286

GPU/Workflow/src/GPUWorkflowTPC.cxx

Lines changed: 70 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -405,6 +405,76 @@ bool GPURecoWorkflowSpec::fetchCalibsCCDBTPC<GPUCalibObjectsConst>(ProcessingCon
405405
newCalibObjects.tpcPadGain = mCalibObjects.mTPCPadGainCalib.get();
406406
mustUpdate = true;
407407
}
408+
409+
// NN clusterizer networks
410+
if (mSpecConfig.nnLoadFromCCDB) {
411+
412+
auto findValidObjectEnd = [](const char* buffer, std::size_t size) {
413+
const char* marker = "Accept-Ranges";
414+
std::size_t markerLen = std::strlen(marker);
415+
416+
auto rpos = std::search(
417+
std::make_reverse_iterator(buffer + size),
418+
std::make_reverse_iterator(buffer),
419+
std::make_reverse_iterator(marker + markerLen),
420+
std::make_reverse_iterator(marker));
421+
422+
if (rpos == std::make_reverse_iterator(buffer)) {
423+
return size; // Marker not found: keep full buffer
424+
}
425+
426+
const char* pos = rpos.base() - markerLen; // Convert reverse iterator back
427+
return static_cast<std::size_t>(pos - buffer);
428+
};
429+
430+
auto dumpToFile = [](const char* buffer, std::size_t validSize, const std::string& path) {
431+
std::ofstream out(path, std::ios::binary | std::ios::trunc);
432+
if (!out.is_open()) {
433+
throw std::runtime_error("Failed to open output file: " + path);
434+
}
435+
436+
out.write(buffer, static_cast<std::streamsize>(validSize));
437+
if (!out) {
438+
throw std::runtime_error("Failed while writing data to: " + path);
439+
}
440+
};
441+
442+
DataRef m;
443+
if (mSpecConfig.nnEvalMode[0] == "c1") {
444+
m = pc.inputs().get("nn_classification_c1");
445+
mConfig->configCalib.nnClusterizerNetworks[0] = const_cast<char*>(m.payload);
446+
size_t size = DataRefUtils::getPayloadSize(m);
447+
mConfig->configCalib.nnClusterizerNetworkSizes[0] = findValidObjectEnd(mConfig->configCalib.nnClusterizerNetworks[0], size);
448+
if (mSpecConfig.nnDumpToFile) {
449+
dumpToFile(mConfig->configCalib.nnClusterizerNetworks[0], mConfig->configCalib.nnClusterizerNetworkSizes[0], "net_classification_c1.onnx");
450+
}
451+
} else if (mSpecConfig.nnEvalMode[0] == "c2") {
452+
m = pc.inputs().get("nn_classification_c2");
453+
mConfig->configCalib.nnClusterizerNetworks[0] = const_cast<char*>(m.payload);
454+
size_t size = DataRefUtils::getPayloadSize(m);
455+
mConfig->configCalib.nnClusterizerNetworkSizes[0] = findValidObjectEnd(mConfig->configCalib.nnClusterizerNetworks[0], size);
456+
if (mSpecConfig.nnDumpToFile) {
457+
dumpToFile(mConfig->configCalib.nnClusterizerNetworks[0], mConfig->configCalib.nnClusterizerNetworkSizes[0], "net_classification_c2.onnx");
458+
}
459+
}
460+
461+
m = pc.inputs().get("nn_regression_c1");
462+
mConfig->configCalib.nnClusterizerNetworks[2] = const_cast<char*>(m.payload);
463+
size_t size = DataRefUtils::getPayloadSize(m);
464+
mConfig->configCalib.nnClusterizerNetworkSizes[2] = findValidObjectEnd(mConfig->configCalib.nnClusterizerNetworks[2], size);
465+
if (mSpecConfig.nnDumpToFile) {
466+
dumpToFile(mConfig->configCalib.nnClusterizerNetworks[2], mConfig->configCalib.nnClusterizerNetworkSizes[2], "net_regression_c1.onnx");
467+
}
468+
if (mSpecConfig.nnEvalMode[1] == "r2") {
469+
m = pc.inputs().get("nn_regression_c2");
470+
mConfig->configCalib.nnClusterizerNetworks[3] = const_cast<char*>(m.payload);
471+
size_t size = DataRefUtils::getPayloadSize(m);
472+
mConfig->configCalib.nnClusterizerNetworkSizes[3] = findValidObjectEnd(mConfig->configCalib.nnClusterizerNetworks[3], size);
473+
if (mSpecConfig.nnDumpToFile) {
474+
dumpToFile(mConfig->configCalib.nnClusterizerNetworks[3], mConfig->configCalib.nnClusterizerNetworkSizes[3], "net_regression_c2.onnx");
475+
}
476+
}
477+
}
408478
}
409479
return mustUpdate;
410480
}

0 commit comments

Comments
 (0)