Skip to content

Commit b33b8bc

Browse files
committed
DPL: refactor dumping of performanceMetrics.json
* Add test. * Allow specifying the file on the command line * Make the function take a std::stream rather than opening the file itself.
1 parent 512d5bc commit b33b8bc

File tree

6 files changed

+286
-15
lines changed

6 files changed

+286
-15
lines changed

Framework/Core/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -240,6 +240,7 @@ add_executable(o2-test-framework-core
240240
test/test_OverrideLabels.cxx
241241
test/test_O2DataModelHelpers.cxx
242242
test/test_RootConfigParamHelpers.cxx
243+
test/test_ResourcesMonitoringHelpers.cxx
243244
test/test_Services.cxx
244245
test/test_StringHelpers.cxx
245246
test/test_StaticFor.cxx

Framework/Core/include/Framework/DriverInfo.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -149,6 +149,8 @@ struct DriverInfo {
149149
std::string uniqueWorkflowId = "";
150150
/// Metrics gathering interval
151151
unsigned short resourcesMonitoringInterval = 0;
152+
/// Where to dump the metrics
153+
std::string resourcesMonitoringFilename = "performanceMetrics.json";
152154
/// Metrics gathering dump to disk interval
153155
unsigned short resourcesMonitoringDumpInterval = 0;
154156
/// Port used by the websocket control. 0 means not initialised.

Framework/Core/src/ResourcesMonitoringHelper.cxx

Lines changed: 3 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -55,7 +55,8 @@ boost::property_tree::ptree fillNodeWithValue(const DeviceMetricsInfo& deviceMet
5555
bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetricsInfo>& metrics,
5656
const DeviceMetricsInfo& driverMetrics,
5757
const std::vector<DeviceSpec>& specs,
58-
std::vector<std::regex> const& performanceMetricsRegex) noexcept
58+
std::vector<std::regex> const& performanceMetricsRegex,
59+
std::ostream &out) noexcept
5960
{
6061

6162
assert(metrics.size() == specs.size());
@@ -161,14 +162,7 @@ bool ResourcesMonitoringHelper::dumpMetricsToJSON(const std::vector<DeviceMetric
161162

162163
root.add_child("driver", driverRoot);
163164

164-
std::ofstream file("performanceMetrics.json", std::ios::out);
165-
if (file.is_open()) {
166-
boost::property_tree::json_parser::write_json(file, root);
167-
} else {
168-
return false;
169-
}
170-
171-
file.close();
165+
boost::property_tree::json_parser::write_json(out, root);
172166

173167
return true;
174168
}

Framework/Core/src/ResourcesMonitoringHelper.h

Lines changed: 3 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,23 +13,22 @@
1313
#define O2_FRAMEWORK_RESOURCESMONITORINGHELPER_H_
1414

1515
#include "Framework/DeviceMetricsInfo.h"
16-
#include "Monitoring/ProcessMonitor.h"
1716
#include "Framework/DeviceSpec.h"
1817

1918
#include <vector>
20-
#include <type_traits>
2119
#include <regex>
20+
#include <iosfwd>
2221

2322
namespace o2::framework
2423
{
25-
2624
struct ResourcesMonitoringHelper {
2725
/// Dump the metrics in @a metrics which match the names specified in @a metricsToDump
2826
/// @a specs are the DeviceSpecs associated to the metrics.
2927
static bool dumpMetricsToJSON(std::vector<DeviceMetricsInfo> const& metrics,
3028
DeviceMetricsInfo const& driverMetrics,
3129
std::vector<DeviceSpec> const& specs,
32-
std::vector<std::regex> const& metricsToDump) noexcept;
30+
std::vector<std::regex> const& metricsToDump,
31+
std::ostream &out) noexcept;
3332
static bool isResourcesMonitoringEnabled(unsigned short interval) noexcept { return interval > 0; }
3433
};
3534

Framework/Core/src/runDataProcessing.cxx

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1247,8 +1247,10 @@ void dumpMetricsCallback(uv_timer_t* handle)
12471247
auto* context = (DriverServerContext*)handle->data;
12481248

12491249
static auto performanceMetrics = getDumpableMetrics();
1250+
std::ofstream file(context->driver->resourcesMonitoringFilename, std::ios::out);
12501251
ResourcesMonitoringHelper::dumpMetricsToJSON(*(context->metrics),
1251-
context->driver->metrics, *(context->specs), performanceMetrics);
1252+
context->driver->metrics, *(context->specs), performanceMetrics,
1253+
file);
12521254
}
12531255

12541256
void dumpRunSummary(DriverServerContext& context, DriverInfo const& driverInfo, DeviceInfos const& infos, DeviceSpecs const& specs)
@@ -2035,6 +2037,7 @@ int runStateMachine(DataProcessorSpecs const& workflow,
20352037
"--fairmq-ipc-prefix",
20362038
"--readers",
20372039
"--resources-monitoring",
2040+
"--resources-monitoring-file",
20382041
"--resources-monitoring-dump-interval",
20392042
"--time-limit",
20402043
};
@@ -2268,7 +2271,7 @@ int runStateMachine(DataProcessorSpecs const& workflow,
22682271
if (driverInfo.resourcesMonitoringDumpInterval) {
22692272
uv_timer_stop(&metricDumpTimer);
22702273
}
2271-
LOG(info) << "Dumping performance metrics to performanceMetrics.json file";
2274+
LOGP(info, "Dumping performance metrics to {}.json file", driverInfo.resourcesMonitoringFilename);
22722275
dumpMetricsCallback(&metricDumpTimer);
22732276
}
22742277
dumpRunSummary(serverContext, driverInfo, infos, runningWorkflow.devices);
@@ -2916,6 +2919,7 @@ int doMain(int argc, char** argv, o2::framework::WorkflowSpec const& workflow,
29162919
("no-IPC", bpo::value<bool>()->zero_tokens()->default_value(false), "disable IPC topology optimization") // //
29172920
("o2-control,o2", bpo::value<std::string>()->default_value(""), "dump O2 Control workflow configuration under the specified name") //
29182921
("resources-monitoring", bpo::value<unsigned short>()->default_value(0), "enable cpu/memory monitoring for provided interval in seconds") //
2922+
("resources-monitoring-file", bpo::value<std::string>()->default_value("performanceMetrics.json"), "file where to dump the metrics") //
29192923
("resources-monitoring-dump-interval", bpo::value<unsigned short>()->default_value(0), "dump monitoring information to disk every provided seconds"); //
29202924
// some of the options must be forwarded by default to the device
29212925
executorOptions.add(DeviceSpecHelpers::getForwardedDeviceOptions());
@@ -3186,6 +3190,7 @@ int doMain(int argc, char** argv, o2::framework::WorkflowSpec const& workflow,
31863190
driverInfo.deployHostname = varmap["hostname"].as<std::string>();
31873191
driverInfo.resources = varmap["resources"].as<std::string>();
31883192
driverInfo.resourcesMonitoringInterval = varmap["resources-monitoring"].as<unsigned short>();
3193+
driverInfo.resourcesMonitoringFilename = varmap["resources-monitoring-file"].as<std::string>();
31893194
driverInfo.resourcesMonitoringDumpInterval = varmap["resources-monitoring-dump-interval"].as<unsigned short>();
31903195

31913196
// FIXME: should use the whole dataProcessorInfos, actually...
Lines changed: 270 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,270 @@
1+
// Copyright 2019-2025 CERN and copyright holders of ALICE O2.
2+
// See https://alice-o2.web.cern.ch/copyright for details of the copyright holders.
3+
// All rights not expressly granted are reserved.
4+
//
5+
// This software is distributed under the terms of the GNU General Public
6+
// License v3 (GPL Version 3), copied verbatim in the file "COPYING".
7+
//
8+
// In applying this license CERN does not waive the privileges and immunities
9+
// granted to it by virtue of its status as an Intergovernmental Organization
10+
// or submit itself to any jurisdiction.
11+
12+
#include "../src/ResourcesMonitoringHelper.h"
13+
#include "Framework/DeviceMetricsInfo.h"
14+
#include "Framework/DeviceMetricsHelper.h"
15+
16+
#include <catch_amalgamated.hpp>
17+
#include <regex>
18+
#include <sstream>
19+
#include <iostream>
20+
21+
TEST_CASE("StreamMetrics")
22+
{
23+
using namespace o2::framework;
24+
std::vector<DeviceSpec> specs{
25+
DeviceSpec{
26+
.name = "someDevice",
27+
.id = "someDevice",
28+
.inputChannels = {},
29+
.outputChannels = {},
30+
.arguments = {},
31+
.options = {},
32+
.services = {},
33+
.algorithm = AlgorithmSpec{},
34+
.inputs = {},
35+
.outputs = {},
36+
.forwards = {},
37+
.rank = 0,
38+
.nSlots = 0,
39+
.inputTimesliceId = 0,
40+
.maxInputTimeslices = 0,
41+
.completionPolicy = CompletionPolicy{},
42+
.dispatchPolicy = DispatchPolicy{},
43+
.callbacksPolicy = CallbacksPolicy{},
44+
.sendingPolicy = SendingPolicy{},
45+
.resourcePolicy = ResourcePolicy{},
46+
.resource = {},
47+
.resourceMonitoringInterval = 10,
48+
.labels = {},
49+
.metadata = {}},
50+
DeviceSpec{
51+
.name = "anotherDevice",
52+
.id = "anotherDevice",
53+
.inputChannels = {},
54+
.outputChannels = {},
55+
.arguments = {},
56+
.options = {},
57+
.services = {},
58+
.algorithm = AlgorithmSpec{},
59+
.inputs = {},
60+
.outputs = {},
61+
.forwards = {},
62+
.rank = 0,
63+
.nSlots = 0,
64+
.inputTimesliceId = 0,
65+
.maxInputTimeslices = 0,
66+
.completionPolicy = CompletionPolicy{},
67+
.dispatchPolicy = DispatchPolicy{},
68+
.callbacksPolicy = CallbacksPolicy{},
69+
.sendingPolicy = SendingPolicy{},
70+
.resourcePolicy = ResourcePolicy{},
71+
.resource = {},
72+
.resourceMonitoringInterval = 10,
73+
.labels = {},
74+
.metadata = {}},
75+
76+
};
77+
78+
// This is the device metrics
79+
std::vector<DeviceMetricsInfo> metrics;
80+
metrics.resize(2);
81+
{
82+
DeviceMetricsInfo& info = metrics[0];
83+
auto bkey = DeviceMetricsHelper::createNumericMetric<int>(info, "bkey");
84+
REQUIRE(info.metricLabels.size() == 1);
85+
REQUIRE(info.metricPrefixes.size() == 1);
86+
auto akey = DeviceMetricsHelper::createNumericMetric<float>(info, "akey");
87+
REQUIRE(info.metricLabels.size() == 2);
88+
REQUIRE(info.metricPrefixes.size() == 2);
89+
auto ckey = DeviceMetricsHelper::createNumericMetric<uint64_t>(info, "ckey");
90+
REQUIRE(info.metricLabels.size() == 3);
91+
REQUIRE(info.metricPrefixes.size() == 3);
92+
REQUIRE(DeviceMetricsHelper::metricIdxByName("akey", info) == 1);
93+
REQUIRE(DeviceMetricsHelper::metricIdxByName("bkey", info) == 0);
94+
REQUIRE(DeviceMetricsHelper::metricIdxByName("ckey", info) == 2);
95+
REQUIRE(info.changed.size() == 3);
96+
REQUIRE(info.changed.at(0) == false);
97+
size_t t = 1000;
98+
ckey(info, 0, t++);
99+
ckey(info, 1, t++);
100+
ckey(info, 2, t++);
101+
ckey(info, 3, t++);
102+
ckey(info, 4, t++);
103+
ckey(info, 5, t++);
104+
}
105+
// Metrics for the second device
106+
{
107+
DeviceMetricsInfo& info = metrics[1];
108+
auto bkey = DeviceMetricsHelper::createNumericMetric<int>(info, "bkey");
109+
REQUIRE(info.metricLabels.size() == 1);
110+
REQUIRE(info.metricPrefixes.size() == 1);
111+
auto akey = DeviceMetricsHelper::createNumericMetric<float>(info, "akey");
112+
REQUIRE(info.metricLabels.size() == 2);
113+
REQUIRE(info.metricPrefixes.size() == 2);
114+
auto ckey = DeviceMetricsHelper::createNumericMetric<uint64_t>(info, "ckey");
115+
REQUIRE(info.metricLabels.size() == 3);
116+
REQUIRE(info.metricPrefixes.size() == 3);
117+
REQUIRE(DeviceMetricsHelper::metricIdxByName("akey", info) == 1);
118+
REQUIRE(DeviceMetricsHelper::metricIdxByName("bkey", info) == 0);
119+
REQUIRE(DeviceMetricsHelper::metricIdxByName("ckey", info) == 2);
120+
REQUIRE(info.changed.size() == 3);
121+
REQUIRE(info.changed.at(0) == false);
122+
size_t t = 1000;
123+
bkey(info, 0, t++);
124+
bkey(info, 1, t++);
125+
bkey(info, 2, t++);
126+
bkey(info, 3, t++);
127+
bkey(info, 4, t++);
128+
bkey(info, 5, t++);
129+
}
130+
131+
// This is the driver metrics
132+
DeviceMetricsInfo driverMetrics;
133+
auto dbkey = DeviceMetricsHelper::createNumericMetric<int>(driverMetrics, "bkey");
134+
REQUIRE(driverMetrics.metricLabels.size() == 1);
135+
REQUIRE(driverMetrics.metricPrefixes.size() == 1);
136+
auto dakey = DeviceMetricsHelper::createNumericMetric<float>(driverMetrics, "akey");
137+
REQUIRE(driverMetrics.metricLabels.size() == 2);
138+
REQUIRE(driverMetrics.metricPrefixes.size() == 2);
139+
auto dckey = DeviceMetricsHelper::createNumericMetric<uint64_t>(driverMetrics, "ckey");
140+
REQUIRE(driverMetrics.metricLabels.size() == 3);
141+
REQUIRE(driverMetrics.metricPrefixes.size() == 3);
142+
REQUIRE(DeviceMetricsHelper::metricIdxByName("akey", driverMetrics) == 1);
143+
REQUIRE(DeviceMetricsHelper::metricIdxByName("bkey", driverMetrics) == 0);
144+
REQUIRE(DeviceMetricsHelper::metricIdxByName("ckey", driverMetrics) == 2);
145+
REQUIRE(driverMetrics.changed.size() == 3);
146+
REQUIRE(driverMetrics.changed.at(0) == false);
147+
size_t t = 2000;
148+
dbkey(driverMetrics, 0, t++);
149+
dbkey(driverMetrics, 1, t++);
150+
dbkey(driverMetrics, 2, t++);
151+
dbkey(driverMetrics, 3, t++);
152+
dbkey(driverMetrics, 4, t++);
153+
dbkey(driverMetrics, 5, t++);
154+
155+
dbkey(driverMetrics, 0, t++);
156+
dbkey(driverMetrics, 1, t++);
157+
158+
dckey(driverMetrics, 0, t++);
159+
160+
std::stringstream streamer;
161+
std::vector<std::regex> performanceMetrics{
162+
std::regex("bkey"),
163+
std::regex("ckey"),
164+
};
165+
166+
ResourcesMonitoringHelper::dumpMetricsToJSON(metrics,
167+
driverMetrics, specs, performanceMetrics,
168+
streamer);
169+
REQUIRE(streamer.str() == R"JSON({
170+
"someDevice": {
171+
"ckey": [
172+
{
173+
"timestamp": "1000",
174+
"value": "0"
175+
},
176+
{
177+
"timestamp": "1001",
178+
"value": "1"
179+
},
180+
{
181+
"timestamp": "1002",
182+
"value": "2"
183+
},
184+
{
185+
"timestamp": "1003",
186+
"value": "3"
187+
},
188+
{
189+
"timestamp": "1004",
190+
"value": "4"
191+
},
192+
{
193+
"timestamp": "1005",
194+
"value": "5"
195+
}
196+
]
197+
},
198+
"anotherDevice": {
199+
"bkey": [
200+
{
201+
"timestamp": "1000",
202+
"value": "0"
203+
},
204+
{
205+
"timestamp": "1001",
206+
"value": "1"
207+
},
208+
{
209+
"timestamp": "1002",
210+
"value": "2"
211+
},
212+
{
213+
"timestamp": "1003",
214+
"value": "3"
215+
},
216+
{
217+
"timestamp": "1004",
218+
"value": "4"
219+
},
220+
{
221+
"timestamp": "1005",
222+
"value": "5"
223+
}
224+
]
225+
},
226+
"driver": {
227+
"bkey": [
228+
{
229+
"timestamp": "2000",
230+
"value": "0"
231+
},
232+
{
233+
"timestamp": "2001",
234+
"value": "1"
235+
},
236+
{
237+
"timestamp": "2002",
238+
"value": "2"
239+
},
240+
{
241+
"timestamp": "2003",
242+
"value": "3"
243+
},
244+
{
245+
"timestamp": "2004",
246+
"value": "4"
247+
},
248+
{
249+
"timestamp": "2005",
250+
"value": "5"
251+
},
252+
{
253+
"timestamp": "2006",
254+
"value": "0"
255+
},
256+
{
257+
"timestamp": "2007",
258+
"value": "1"
259+
}
260+
],
261+
"ckey": [
262+
{
263+
"timestamp": "2008",
264+
"value": "0"
265+
}
266+
]
267+
}
268+
}
269+
)JSON");
270+
}

0 commit comments

Comments
 (0)