Skip to content

Commit 3775044

Browse files
committed
Working version of NN CCDB fetching and loading to file
1 parent a2aaf8e commit 3775044

File tree

6 files changed

+151
-50
lines changed

6 files changed

+151
-50
lines changed

Common/ML/include/ML/OrtInterface.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,7 @@ class OrtModel
5151
void initOptions(std::unordered_map<std::string, std::string> optionsMap);
5252
void initEnvironment();
5353
void initSession();
54+
void initSessionFromBuffer(const void* buffer, size_t bufferSize);
5455
void memoryOnDevice(int32_t = 0);
5556
bool isInitialized() { return mInitialized; }
5657
void resetSession();

Common/ML/src/OrtInterface.cxx

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -138,6 +138,21 @@ void OrtModel::initEnvironment()
138138
(mPImplOrt->env)->DisableTelemetryEvents(); // Disable telemetry events
139139
}
140140

141+
void OrtModel::initSessionFromBuffer(const void* buffer, size_t bufferSize)
142+
{
143+
mPImplOrt->session = std::make_unique<Ort::Session>(*mPImplOrt->env,
144+
static_cast<const uint8_t*>(buffer),
145+
bufferSize,
146+
mPImplOrt->sessionOptions);
147+
mPImplOrt->ioBinding = std::make_unique<Ort::IoBinding>(*mPImplOrt->session);
148+
149+
setIO();
150+
151+
if (mLoggingLevel < 2) {
152+
LOG(info) << "(ORT) Model loaded successfully from buffer! (inputs: " << printShape(mInputShapes, mInputNames) << ", outputs: " << printShape(mOutputShapes, mInputNames) << ")";
153+
}
154+
}
155+
141156
void OrtModel::initSession()
142157
{
143158
if (mAllocateDeviceMemory) {

Detectors/TPC/calibration/src/NeuralNetworkClusterizer.cxx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,8 @@ void NeuralNetworkClusterizer::loadIndividualFromCCDB(std::map<std::string, std:
3737
metadata["nnCCDBBeamType"] = settings["nnCCDBBeamType"];
3838
}
3939

40+
LOG(info) << "(NN CLUS) Retrieving network " << settings["nnCCDBPath"] << " from CCDB (NeuralNetworkClusterizer.cxx)";
41+
4042
bool retrieveSuccess = ccdbApi.retrieveBlob(settings["nnCCDBPath"], settings["outputFolder"], metadata, 1, false, settings["outputFile"]);
4143
// headers = ccdbApi.retrieveHeaders(settings["nnPathCCDB"], metadata, 1); // potentially needed to init some local variables
4244

GPU/GPUTracking/Definitions/GPUSettingsList.h

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -285,14 +285,14 @@ AddOption(nnClusterizerUseClassification, int, 1, "", 0, "If 1, the classificati
285285
AddOption(nnClusterizerForceGpuInputFill, int, 0, "", 0, "Forces to use the fillInputNNGPU function")
286286
// CCDB
287287
AddOption(nnLoadFromCCDB, int, 0, "", 0, "If 1 networks are fetched from ccdb, else locally")
288+
AddOption(nnCCDBDumpToFile, int, 1, "", 0, "If 1, additionally dump fetched CCDB networks to nnLocalFolder")
288289
AddOption(nnLocalFolder, std::string, ".", "", 0, "Local folder in which the networks will be fetched")
289-
AddOption(nnCCDBURL, std::string, "http://ccdb-test.cern.ch:8080", "", 0, "The CCDB URL from where the network files are fetched")
290290
AddOption(nnCCDBPath, std::string, "Users/c/csonnabe/TPC/Clusterization", "", 0, "Folder path containing the networks")
291-
AddOption(nnCCDBWithMomentum, int, 1, "", 0, "Distinguishes between the network with and without momentum output for the regression")
291+
AddOption(nnCCDBWithMomentum, std::string, "", "", 0, "Distinguishes between the network with and without momentum output for the regression")
292292
AddOption(nnCCDBClassificationLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
293-
AddOption(nnCCDBRegressionLayerType, std::string, "CNN", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
294-
AddOption(nnCCDBBeamType, std::string, "PbPb", "", 0, "Distinguishes between networks trained for different beam types. Options: PbPb, pp")
295-
AddOption(nnCCDBInteractionRate, int, 50, "", 0, "Distinguishes between networks for different interaction rates [kHz].")
293+
AddOption(nnCCDBRegressionLayerType, std::string, "FC", "", 0, "Distinguishes between network with different layer types. Options: FC, CNN")
294+
AddOption(nnCCDBBeamType, std::string, "PbPb", "", 0, "Distinguishes between networks trained for different beam types. Options: PbPb, pp, pPb")
295+
AddOption(nnCCDBInteractionRate, std::string, "500", "", 0, "Distinguishes between networks for different interaction rates [kHz].")
296296
AddHelp("help", 'h')
297297
EndConfig()
298298

GPU/Workflow/include/GPUWorkflow/GPUWorkflowSpec.h

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -135,6 +135,9 @@ class GPURecoWorkflowSpec : public o2::framework::Task
135135
bool tpcTriggerHandling = false;
136136
bool isITS3 = false;
137137
bool useFilteredOutputSpecs = false;
138+
139+
// NN clusterizer
140+
bool nnLoadFromCCDB = false;
138141
};
139142

140143
GPURecoWorkflowSpec(CompletionPolicyData* policyData, Config const& specconfig, std::vector<int32_t> const& tpcsectors, uint64_t tpcSectorMask, std::shared_ptr<o2::base::GRPGeomRequest>& ggr, std::function<bool(o2::framework::DataProcessingHeader::StartTime)>** gPolicyOrder = nullptr);
@@ -230,7 +233,7 @@ class GPURecoWorkflowSpec : public o2::framework::Task
230233
uint32_t mNextThreadIndex = 0;
231234
bool mUpdateGainMapCCDB = true;
232235
std::unique_ptr<o2::gpu::GPUSettingsTF> mTFSettings;
233-
std::unique_ptr<o2::gpu::GPUSettingsProcessingNNclusterizer> mNNClusterizerSettings;
236+
std::map<std::string, std::string> nnCCDBSettings;
234237

235238
Config mSpecConfig;
236239
std::shared_ptr<o2::base::GRPGeomRequest> mGGR;

GPU/Workflow/src/GPUWorkflowSpec.cxx

Lines changed: 124 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -133,50 +133,6 @@ void GPURecoWorkflowSpec::init(InitContext& ic)
133133
{
134134
GRPGeomHelper::instance().setRequest(mGGR);
135135
GPUO2InterfaceConfiguration& config = *mConfig.get();
136-
GPUSettingsProcessingNNclusterizer& mNNClusterizerSettings = mConfig->configProcessing.nn;
137-
138-
if (mNNClusterizerSettings.nnLoadFromCCDB) {
139-
LOG(info) << "Loading neural networks from CCDB";
140-
o2::tpc::NeuralNetworkClusterizer nnClusterizerFetcher;
141-
nnClusterizerFetcher.initCcdbApi(mNNClusterizerSettings.nnCCDBURL);
142-
std::map<std::string, std::string> ccdbSettings = {
143-
{"nnCCDBURL", mNNClusterizerSettings.nnCCDBURL},
144-
{"nnCCDBPath", mNNClusterizerSettings.nnCCDBPath},
145-
{"inputDType", mNNClusterizerSettings.nnInferenceInputDType},
146-
{"outputDType", mNNClusterizerSettings.nnInferenceOutputDType},
147-
{"outputFolder", mNNClusterizerSettings.nnLocalFolder},
148-
{"nnCCDBPath", mNNClusterizerSettings.nnCCDBPath},
149-
{"nnCCDBWithMomentum", std::to_string(mNNClusterizerSettings.nnCCDBWithMomentum)},
150-
{"nnCCDBBeamType", mNNClusterizerSettings.nnCCDBBeamType},
151-
{"nnCCDBInteractionRate", std::to_string(mNNClusterizerSettings.nnCCDBInteractionRate)}};
152-
153-
std::string nnFetchFolder = mNNClusterizerSettings.nnLocalFolder;
154-
std::vector<std::string> evalMode = o2::utils::Str::tokenize(mNNClusterizerSettings.nnEvalMode, ':');
155-
156-
if (evalMode[0] == "c1") {
157-
ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBClassificationLayerType;
158-
ccdbSettings["nnCCDBEvalType"] = "classification_c1";
159-
ccdbSettings["outputFile"] = "net_classification_c1.onnx";
160-
nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings);
161-
} else if (evalMode[0] == "c2") {
162-
ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBClassificationLayerType;
163-
ccdbSettings["nnCCDBEvalType"] = "classification_c2";
164-
ccdbSettings["outputFile"] = "net_classification_c2.onnx";
165-
nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings);
166-
}
167-
168-
ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBRegressionLayerType;
169-
ccdbSettings["nnCCDBEvalType"] = "regression_c1";
170-
ccdbSettings["outputFile"] = "net_regression_c1.onnx";
171-
nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings);
172-
if (evalMode[1] == "r2") {
173-
ccdbSettings["nnCCDBLayerType"] = mNNClusterizerSettings.nnCCDBRegressionLayerType;
174-
ccdbSettings["nnCCDBEvalType"] = "regression_c2";
175-
ccdbSettings["outputFile"] = "net_regression_c2.onnx";
176-
nnClusterizerFetcher.loadIndividualFromCCDB(ccdbSettings);
177-
}
178-
LOG(info) << "Neural network loading done!";
179-
}
180136

181137
// Create configuration object and fill settings
182138
mConfig->configGRP.solenoidBzNominalGPU = 0;
@@ -185,6 +141,7 @@ void GPURecoWorkflowSpec::init(InitContext& ic)
185141
mTFSettings->simStartOrbit = hbfu.getFirstIRofTF(o2::InteractionRecord(0, hbfu.orbitFirstSampled)).orbit;
186142

187143
*mConfParam = mConfig->ReadConfigurableParam();
144+
188145
if (mConfParam->display) {
189146
mDisplayFrontend.reset(GPUDisplayFrontendInterface::getFrontend(mConfig->configDisplay.displayFrontend.c_str()));
190147
mConfig->configProcessing.eventDisplay = mDisplayFrontend.get();
@@ -814,6 +771,68 @@ void GPURecoWorkflowSpec::run(ProcessingContext& pc)
814771

815772
// ------------------------------ Actual processing ------------------------------
816773

774+
if (mSpecConfig.nnLoadFromCCDB) {
775+
LOG(info) << "(NN CLUS) Fetching CCDB calib objects";
776+
777+
auto dumpOnnxToFile = [](const char* buffer, std::size_t size, const std::string& path) {
778+
const char* marker = "Accept-Ranges";
779+
const char* pos = std::search(buffer, buffer + size, marker, marker + std::strlen(marker));
780+
781+
// Compute the actual number of bytes to write
782+
std::size_t writeSize = (pos != buffer + size)
783+
? static_cast<std::size_t>(pos - buffer)
784+
: size;
785+
786+
std::ofstream out(path, std::ios::binary | std::ios::trunc);
787+
if (!out.is_open()) {
788+
throw std::runtime_error("Failed to open ONNX output file: " + path);
789+
}
790+
791+
out.write(buffer, static_cast<std::streamsize>(writeSize));
792+
if (!out) {
793+
throw std::runtime_error("Failed while writing ONNX data to: " + path);
794+
}
795+
};
796+
797+
GPUSettingsProcessingNNclusterizer& nnClusterizerSettings = mConfig->configProcessing.nn;
798+
std::vector<std::string> evalMode = o2::utils::Str::tokenize(nnClusterizerSettings.nnEvalMode, ':');
799+
800+
DataRef m;
801+
if (evalMode[0] == "c1") {
802+
m = pc.inputs().get("nn_classification_c1");
803+
const char* buffer = const_cast<char*>(m.payload);
804+
size_t size = DataRefUtils::getPayloadSize(m);
805+
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
806+
dumpOnnxToFile(buffer, size, "net_classification_c1.onnx");
807+
LOG(info) << "(NN CLUS) Dumped nn_classification_c1 from CCDB to net_classification_c1.onnx";
808+
}
809+
} else if (evalMode[0] == "c2") {
810+
m = pc.inputs().get("nn_classification_c2");
811+
const char* buffer = const_cast<char*>(m.payload);
812+
size_t size = DataRefUtils::getPayloadSize(m);
813+
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
814+
dumpOnnxToFile(buffer, size, "net_classification_c2.onnx");
815+
LOG(info) << "(NN CLUS) Dumped nn_classification_c2 from CCDB to net_classification_c2.onnx";
816+
}
817+
}
818+
819+
m = pc.inputs().get("nn_regression_c1");
820+
const char* buffer = const_cast<char*>(m.payload);
821+
size_t size = DataRefUtils::getPayloadSize(m);
822+
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
823+
dumpOnnxToFile(buffer, size, "net_regression_c1.onnx");
824+
LOG(info) << "(NN CLUS) Dumped nn_regression_c1 from CCDB to net_regression_c1.onnx";
825+
}
826+
if (evalMode[1] == "r2") {
827+
m = pc.inputs().get("nn_regression_c2");
828+
const char* buffer = const_cast<char*>(m.payload);
829+
size_t size = DataRefUtils::getPayloadSize(m);
830+
if (nnClusterizerSettings.nnCCDBDumpToFile == 1) {
831+
dumpOnnxToFile(buffer, size, "net_regression_c2.onnx");
832+
LOG(info) << "(NN CLUS) Dumped nn_regression_c2 from CCDB to net_regression_c2.onnx";
833+
}
834+
}
835+
}
817836
if ((int32_t)(ptrs.tpcZS != nullptr) + (int32_t)(ptrs.tpcPackedDigits != nullptr && (ptrs.tpcZS == nullptr || ptrs.tpcPackedDigits->tpcDigitsMC == nullptr)) + (int32_t)(ptrs.clustersNative != nullptr) + (int32_t)(ptrs.tpcCompressedClusters != nullptr) != 1) {
818837
throw std::runtime_error("Invalid input for gpu tracking");
819838
}
@@ -1262,6 +1281,67 @@ Inputs GPURecoWorkflowSpec::inputs()
12621281
}
12631282
}
12641283

1284+
// NN clusterizer
1285+
*mConfParam = mConfig->ReadConfigurableParam();
1286+
if (mConfig->configProcessing.nn.nnLoadFromCCDB) {
1287+
1288+
LOG(info) << "(NN CLUS) Enabling fetching of TPC NN clusterizer from CCDB";
1289+
mSpecConfig.nnLoadFromCCDB = true;
1290+
GPUSettingsProcessingNNclusterizer& nnClusterizerSettings = mConfig->configProcessing.nn;
1291+
1292+
std::map<std::string, std::string> metadata;
1293+
metadata["inputDType"] = nnClusterizerSettings.nnInferenceInputDType; // FP16 or FP32
1294+
metadata["outputDType"] = nnClusterizerSettings.nnInferenceOutputDType; // FP16 or FP32
1295+
metadata["nnCCDBWithMomentum"] = nnClusterizerSettings.nnCCDBWithMomentum; // 0, 1 -> Only for regression model
1296+
metadata["nnCCDBLayerType"] = nnClusterizerSettings.nnCCDBClassificationLayerType; // FC, CNN
1297+
metadata["nnCCDBInteractionRate"] = nnClusterizerSettings.nnCCDBInteractionRate; // in kHz
1298+
metadata["nnCCDBBeamType"] = nnClusterizerSettings.nnCCDBBeamType; // pp, pPb, PbPb
1299+
1300+
auto convert_map_to_metadata = [](const std::map<std::string, std::string>& inputMap, std::vector<o2::framework::CCDBMetadata>& outputMetadata) {
1301+
for (const auto& [key, value] : inputMap) {
1302+
if (value != "") {
1303+
outputMetadata.push_back({key, value});
1304+
}
1305+
}
1306+
};
1307+
1308+
std::vector<std::string> evalMode = o2::utils::Str::tokenize(nnClusterizerSettings.nnEvalMode, ':');
1309+
std::vector<o2::framework::CCDBMetadata> ccdb_metadata;
1310+
1311+
auto printSettings = [](const std::map<std::string, std::string>& settings) {
1312+
LOG(info) << "(NN CLUS) NN Clusterizer CCDB settings:";
1313+
for (const auto& [key, value] : settings) {
1314+
LOG(info) << " " << key << " : " << value;
1315+
}
1316+
};
1317+
printSettings(metadata);
1318+
1319+
if (evalMode[0] == "c1") {
1320+
metadata["nnCCDBEvalType"] = "classification_c1";
1321+
convert_map_to_metadata(metadata, ccdb_metadata);
1322+
inputs.emplace_back("nn_classification_c1", "TPC", "NNCLUSTERIZER_C1", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1323+
LOG(info) << "(NN CLUS) Loading NN clusterizer classification (c1) from CCDB";
1324+
} else if (evalMode[0] == "c2") {
1325+
metadata["nnCCDBEvalType"] = "classification_c2";
1326+
convert_map_to_metadata(metadata, ccdb_metadata);
1327+
inputs.emplace_back("nn_classification_c2", "TPC", "NNCLUSTERIZER_C2", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1328+
LOG(info) << "(NN CLUS) Loading NN clusterizer classification (c2) from CCDB";
1329+
}
1330+
1331+
metadata["nnCCDBEvalType"] = "regression_c1";
1332+
metadata["nnCCDBLayerType"] = nnClusterizerSettings.nnCCDBRegressionLayerType;
1333+
convert_map_to_metadata(metadata, ccdb_metadata);
1334+
inputs.emplace_back("nn_regression_c1", "TPC", "NNCLUSTERIZER_R1", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1335+
LOG(info) << "(NN CLUS) Loading NN clusterizer regression (r1) from CCDB";
1336+
1337+
if (evalMode[1] == "r2") {
1338+
metadata["nnCCDBEvalType"] = "regression_c2";
1339+
convert_map_to_metadata(metadata, ccdb_metadata);
1340+
inputs.emplace_back("nn_regression_c2", "TPC", "NNCLUSTERIZER_R2", 0, Lifetime::Condition, ccdbParamSpec(nnClusterizerSettings.nnCCDBPath, ccdb_metadata, 0));
1341+
LOG(info) << "(NN CLUS) Loading NN clusterizer regression (r2) from CCDB";
1342+
}
1343+
}
1344+
12651345
return inputs;
12661346
};
12671347

0 commit comments

Comments
 (0)