Skip to content

Commit 350f786

Browse files
author
miranov25
committed
adding PerfromanceLogger extracted from calibration code
1 parent 8ddfbf7 commit 350f786

File tree

2 files changed

+154
-59
lines changed

2 files changed

+154
-59
lines changed

UTILS/perfmonitor/README.md

Lines changed: 57 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -61,13 +61,18 @@ print(summary)
6161
### Example Summary Output
6262

6363
```
64-
elapsed_sec rss_gb
65-
mean max min mean max min
66-
step
67-
loop::step[0] 0.10 0.10 0.10 0.14 0.14 0.14
68-
loop::step[1] 0.20 0.20 0.20 0.15 0.15 0.15
69-
loop::step[2] 0.30 0.30 0.30 0.15 0.15 0.15
70-
setup::start 0.00 0.00 0.00 0.13 0.13 0.13
64+
Out[5]:
65+
{'summary_by_step': elapsed_sec rss_gb
66+
mean max min count mean max min count
67+
step
68+
loop::step 0.34 0.61 0.1 15 0.148 0.22 0.13 15
69+
setup::start 0.00 0.00 0.0 5 0.148 0.22 0.13 5,
70+
'summary_by_step_and_index': elapsed_sec rss_gb
71+
mean max min count mean max min count
72+
step index_0
73+
loop::step 0.0 0.102 0.11 0.10 5 0.148 0.22 0.13 5
74+
1.0 0.308 0.31 0.30 5 0.148 0.22 0.13 5
75+
2.0 0.610 0.61 0.61 5 0.148 0.22 0.13 5}
7176
```
7277

7378
## Plotting
@@ -94,25 +99,60 @@ This will be automatically parsed into new DataFrame columns:
9499
* `index_1` → 2
95100

96101
## Advanced: Custom Configuration
102+
can be obtained modyfying the `default_plot_config` and `default_summary_config` dictionaries.
103+
and invoking the `PerformanceLogger.plot` and `PerformanceLogger.summarize_with_config` with that configs
97104

98-
```python
99-
custom_summary = {
100-
"by": ["step", "index_0"],
101-
"stats": ["mean", "max"]
102-
}
105+
PerformanceLogger.plot(df, default_plot_config, output_pdf="perf_plots.pdf")
103106

104-
custom_plots = {
105-
"RSS Over Time": {
107+
```python
108+
default_plot_config={
109+
"RSS vs Time": {
106110
"kind": "line",
107111
"varX": "timestamp",
108112
"varY": "rss_gb",
109-
"title": "RSS vs Time",
110-
"sort": "timestamp",
113+
"title": "RSS over Time",
114+
"sort": "timestamp"
115+
},
116+
"RSS vs Step (chronological)": {
117+
"kind": "line",
118+
"varX": "rowID",
119+
"varY": "rss_gb",
120+
"title": "RSS vs Step",
121+
"xlabel": "step",
122+
"xticklabels": "step",
123+
"sort": "rowID"
124+
},
125+
"Elapsed Time vs Step": {
126+
"kind": "bar",
127+
"varX": "step",
128+
"varY": "elapsed_sec",
129+
"title": "Elapsed Time per Step",
130+
"sort": None
131+
},
132+
"RSS Summary Stats": {
133+
"varX": "step",
134+
"varY": "rss_gb",
135+
"aggregation": ["mean", "median", "std"],
136+
"title": "RSS Summary Statistics",
137+
"xlabel": "Step",
138+
"ylabel": "RSS (GB)",
139+
"sort": "step"
111140
}
141+
112142
}
113143

114-
PerformanceLogger.plot(df, custom_plots)
144+
default_summary_config={
145+
"summary_by_step": {
146+
"by": ["step"],
147+
"stats": ["mean", "max", "min", "count"]
148+
},
149+
"summary_by_step_and_index": {
150+
"by": ["step", "index_0"],
151+
"stats": ["mean", "max", "min", "count"]
152+
}
153+
}
115154
```
116155

156+
117157
## License
118158
???

UTILS/perfmonitor/performance_logger.py

Lines changed: 97 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -24,15 +24,16 @@ def log(self, step: str, index: Optional[List[int]] = None):
2424
f.write(line)
2525
print(f"{step_full} | {elapsed:.2f} | {mem_gb:.2f} | {self.user} | {self.host}")
2626

27+
2728
@staticmethod
2829
def log_to_dataframe(log_paths: Union[str, List[str]], sep: str = "|") -> pd.DataFrame:
2930
if isinstance(log_paths, str):
3031
log_paths = [log_paths]
3132

3233
rows = []
33-
for path in log_paths:
34+
for log_id, path in enumerate(log_paths):
3435
with open(path) as f:
35-
for line in f:
36+
for row_id, line in enumerate(f):
3637
parts = [x.strip() for x in line.strip().split(sep)]
3738
if len(parts) < 5:
3839
continue
@@ -44,16 +45,18 @@ def log_to_dataframe(log_paths: Union[str, List[str]], sep: str = "|") -> pd.Dat
4445
"rss_gb": float(rss_str),
4546
"user": user,
4647
"host": host,
47-
"logfile": path
48+
"logfile": path,
49+
"rowID": row_id,
50+
"logID": log_id
4851
}
4952

5053
if "[" in step and "]" in step:
5154
base, idx = step.split("[")
5255
row["step"] = base
5356
idx = idx.rstrip("]")
5457
for i, val in enumerate(idx.split(",")):
55-
if val.isdigit():
56-
row[f"index_{i}"] = int(val)
58+
if val.strip().isdigit():
59+
row[f"index_{i}"] = int(val.strip())
5760
rows.append(row)
5861

5962
return pd.DataFrame(rows)
@@ -62,12 +65,16 @@ def log_to_dataframe(log_paths: Union[str, List[str]], sep: str = "|") -> pd.Dat
6265
def summarize_with_config(df: pd.DataFrame, config: Dict) -> pd.DataFrame:
6366
group_cols = config.get("by", ["step"])
6467
stats = config.get("stats", ["mean", "max", "min"])
65-
6668
agg = {}
6769
for col in ["elapsed_sec", "rss_gb"]:
6870
agg[col] = stats
69-
7071
return df.groupby(group_cols).agg(agg)
72+
@staticmethod
73+
def summarize_with_configs(df: pd.DataFrame, config_dict: Dict[str, Dict]) -> Dict[str, pd.DataFrame]:
74+
summaries = {}
75+
for name, config in config_dict.items():
76+
summaries[name] = PerformanceLogger.summarize_with_config(df, config)
77+
return summaries
7178

7279
@staticmethod
7380
def plot(df: pd.DataFrame,
@@ -87,25 +94,45 @@ def plot(df: pd.DataFrame,
8794
if "filter" in config:
8895
subdf = subdf.query(config["filter"])
8996

90-
if "sort" in config:
91-
subdf = subdf.sort_values(config["sort"])
97+
varX = config.get("varX", "timestamp")
98+
varY = config.get("varY", "elapsed_sec")
99+
aggregation = config.get("aggregation")
100+
xlabel = config.get("xlabel", varX)
101+
ylabel = config.get("ylabel", varY)
92102

93-
x = subdf[config.get("varX", "timestamp")]
94-
y = subdf[config.get("varY", "elapsed_sec")]
95-
kind = config.get("kind", "line")
103+
if aggregation:
104+
if isinstance(aggregation, list):
105+
agg_df = subdf.groupby(varX)[varY].agg(aggregation)
106+
subdf = agg_df.reset_index()
107+
else:
108+
subdf = subdf.groupby(varX)[varY].agg(aggregation).reset_index()
109+
110+
sort_column = config.get("sort")
111+
if sort_column:
112+
subdf = subdf.sort_values(sort_column)
96113

97114
plt.figure()
98-
if kind == "line":
99-
plt.plot(x, y, marker="o")
100-
elif kind == "bar":
101-
plt.bar(x, y)
115+
116+
if aggregation and isinstance(aggregation, list):
117+
for stat in aggregation:
118+
plt.plot(subdf[varX], subdf[stat], marker="o", label=stat)
119+
plt.legend()
102120
else:
103-
raise ValueError(f"Unsupported plot kind: {kind}")
121+
y = subdf[varY]
122+
kind = config.get("kind", "line")
123+
if kind == "line":
124+
plt.plot(subdf[varX], y, marker="o")
125+
elif kind == "bar":
126+
plt.bar(subdf[varX], y)
127+
else:
128+
raise ValueError(f"Unsupported plot kind: {kind}")
129+
130+
if "xticklabels" in config:
131+
plt.xticks(ticks=subdf[varX], labels=subdf[config["xticklabels"]], rotation=45)
104132

105133
plt.title(config.get("title", name))
106-
plt.xlabel(config.get("xlabel", config.get("varX", "timestamp")))
107-
plt.ylabel(config.get("ylabel", config.get("varY", "elapsed_sec")))
108-
plt.xticks(rotation=45)
134+
plt.xlabel(xlabel)
135+
plt.ylabel(ylabel)
109136
plt.tight_layout()
110137

111138
if output_pdf:
@@ -118,33 +145,61 @@ def plot(df: pd.DataFrame,
118145
pdf.close()
119146

120147

148+
149+
121150
# Default configurations
122151

123152
default_plot_config={
124-
"RSS vs Time": {
125-
"kind": "line",
126-
"varX": "timestamp",
127-
"varY": "rss_gb",
128-
"title": "RSS over Time",
129-
"sort": "timestamp"
130-
},
131-
"RSS vs step": {
132-
"kind": "line",
133-
"varX": "step",
134-
"varY": "rss_gb",
135-
"title": "RSS over Time",
136-
},
137-
"Elapsed Time vs Step": {
138-
"kind": "bar",
139-
"varX": "step",
140-
"varY": "elapsed_sec",
141-
"title": "Elapsed Time per Step",
142-
"sort": "step"
143-
}
153+
"RSS vs Time": {
154+
"kind": "line",
155+
"varX": "timestamp",
156+
"varY": "rss_gb",
157+
"title": "RSS over Time",
158+
"sort": "timestamp"
159+
},
160+
"RSS vs Step (chronological)": {
161+
"kind": "line",
162+
"varX": "rowID",
163+
"varY": "rss_gb",
164+
"title": "RSS vs Step",
165+
"xlabel": "step",
166+
"xticklabels": "step",
167+
"sort": "rowID"
168+
},
169+
"Elapsed Time vs Step": {
170+
"kind": "bar",
171+
"varX": "step",
172+
"varY": "elapsed_sec",
173+
"title": "Elapsed Time per Step",
174+
"sort": None
175+
},
176+
"RSS Summary Stats": {
177+
"varX": "step",
178+
"varY": "rss_gb",
179+
"aggregation": ["mean", "median", "std"],
180+
"title": "RSS Summary Statistics",
181+
"xlabel": "Step",
182+
"ylabel": "RSS (GB)",
183+
"sort": "step"
184+
},
185+
"Elapsed Time Summary Stats": {
186+
"varX": "step",
187+
"varY": "elapsed_sec",
188+
"aggregation": ["mean", "median", "std"],
189+
"title": "Elapsed Time Summary Statistics",
190+
"xlabel": "Step",
191+
"ylabel": "Elapsed Time (s)",
192+
"sort": "step"
193+
},
144194
}
145195

146196
default_summary_config={
197+
"summary_by_step": {
147198
"by": ["step"],
148-
"stats": ["mean", "max", "min"]
199+
"stats": ["mean", "max", "min", "count"]
200+
},
201+
"summary_by_step_and_index": {
202+
"by": ["step", "index_0"],
203+
"stats": ["mean", "max", "min", "count"]
149204
}
150-
205+
}

0 commit comments

Comments
 (0)