@@ -24,15 +24,16 @@ def log(self, step: str, index: Optional[List[int]] = None):
2424 f .write (line )
2525 print (f"{ step_full } | { elapsed :.2f} | { mem_gb :.2f} | { self .user } | { self .host } " )
2626
27+
2728 @staticmethod
2829 def log_to_dataframe (log_paths : Union [str , List [str ]], sep : str = "|" ) -> pd .DataFrame :
2930 if isinstance (log_paths , str ):
3031 log_paths = [log_paths ]
3132
3233 rows = []
33- for path in log_paths :
34+ for log_id , path in enumerate ( log_paths ) :
3435 with open (path ) as f :
35- for line in f :
36+ for row_id , line in enumerate ( f ) :
3637 parts = [x .strip () for x in line .strip ().split (sep )]
3738 if len (parts ) < 5 :
3839 continue
@@ -44,16 +45,18 @@ def log_to_dataframe(log_paths: Union[str, List[str]], sep: str = "|") -> pd.Dat
4445 "rss_gb" : float (rss_str ),
4546 "user" : user ,
4647 "host" : host ,
47- "logfile" : path
48+ "logfile" : path ,
49+ "rowID" : row_id ,
50+ "logID" : log_id
4851 }
4952
5053 if "[" in step and "]" in step :
5154 base , idx = step .split ("[" )
5255 row ["step" ] = base
5356 idx = idx .rstrip ("]" )
5457 for i , val in enumerate (idx .split ("," )):
55- if val .isdigit ():
56- row [f"index_{ i } " ] = int (val )
58+ if val .strip (). isdigit ():
59+ row [f"index_{ i } " ] = int (val . strip () )
5760 rows .append (row )
5861
5962 return pd .DataFrame (rows )
@@ -62,12 +65,16 @@ def log_to_dataframe(log_paths: Union[str, List[str]], sep: str = "|") -> pd.Dat
6265 def summarize_with_config (df : pd .DataFrame , config : Dict ) -> pd .DataFrame :
6366 group_cols = config .get ("by" , ["step" ])
6467 stats = config .get ("stats" , ["mean" , "max" , "min" ])
65-
6668 agg = {}
6769 for col in ["elapsed_sec" , "rss_gb" ]:
6870 agg [col ] = stats
69-
7071 return df .groupby (group_cols ).agg (agg )
72+ @staticmethod
73+ def summarize_with_configs (df : pd .DataFrame , config_dict : Dict [str , Dict ]) -> Dict [str , pd .DataFrame ]:
74+ summaries = {}
75+ for name , config in config_dict .items ():
76+ summaries [name ] = PerformanceLogger .summarize_with_config (df , config )
77+ return summaries
7178
7279 @staticmethod
7380 def plot (df : pd .DataFrame ,
@@ -87,25 +94,45 @@ def plot(df: pd.DataFrame,
8794 if "filter" in config :
8895 subdf = subdf .query (config ["filter" ])
8996
90- if "sort" in config :
91- subdf = subdf .sort_values (config ["sort" ])
97+ varX = config .get ("varX" , "timestamp" )
98+ varY = config .get ("varY" , "elapsed_sec" )
99+ aggregation = config .get ("aggregation" )
100+ xlabel = config .get ("xlabel" , varX )
101+ ylabel = config .get ("ylabel" , varY )
92102
93- x = subdf [config .get ("varX" , "timestamp" )]
94- y = subdf [config .get ("varY" , "elapsed_sec" )]
95- kind = config .get ("kind" , "line" )
103+ if aggregation :
104+ if isinstance (aggregation , list ):
105+ agg_df = subdf .groupby (varX )[varY ].agg (aggregation )
106+ subdf = agg_df .reset_index ()
107+ else :
108+ subdf = subdf .groupby (varX )[varY ].agg (aggregation ).reset_index ()
109+
110+ sort_column = config .get ("sort" )
111+ if sort_column :
112+ subdf = subdf .sort_values (sort_column )
96113
97114 plt .figure ()
98- if kind == "line" :
99- plt .plot (x , y , marker = "o" )
100- elif kind == "bar" :
101- plt .bar (x , y )
115+
116+ if aggregation and isinstance (aggregation , list ):
117+ for stat in aggregation :
118+ plt .plot (subdf [varX ], subdf [stat ], marker = "o" , label = stat )
119+ plt .legend ()
102120 else :
103- raise ValueError (f"Unsupported plot kind: { kind } " )
121+ y = subdf [varY ]
122+ kind = config .get ("kind" , "line" )
123+ if kind == "line" :
124+ plt .plot (subdf [varX ], y , marker = "o" )
125+ elif kind == "bar" :
126+ plt .bar (subdf [varX ], y )
127+ else :
128+ raise ValueError (f"Unsupported plot kind: { kind } " )
129+
130+ if "xticklabels" in config :
131+ plt .xticks (ticks = subdf [varX ], labels = subdf [config ["xticklabels" ]], rotation = 45 )
104132
105133 plt .title (config .get ("title" , name ))
106- plt .xlabel (config .get ("xlabel" , config .get ("varX" , "timestamp" )))
107- plt .ylabel (config .get ("ylabel" , config .get ("varY" , "elapsed_sec" )))
108- plt .xticks (rotation = 45 )
134+ plt .xlabel (xlabel )
135+ plt .ylabel (ylabel )
109136 plt .tight_layout ()
110137
111138 if output_pdf :
@@ -118,33 +145,61 @@ def plot(df: pd.DataFrame,
118145 pdf .close ()
119146
120147
148+
149+
121150# Default configurations
122151
123152default_plot_config = {
124- "RSS vs Time" : {
125- "kind" : "line" ,
126- "varX" : "timestamp" ,
127- "varY" : "rss_gb" ,
128- "title" : "RSS over Time" ,
129- "sort" : "timestamp"
130- },
131- "RSS vs step" : {
132- "kind" : "line" ,
133- "varX" : "step" ,
134- "varY" : "rss_gb" ,
135- "title" : "RSS over Time" ,
136- },
137- "Elapsed Time vs Step" : {
138- "kind" : "bar" ,
139- "varX" : "step" ,
140- "varY" : "elapsed_sec" ,
141- "title" : "Elapsed Time per Step" ,
142- "sort" : "step"
143- }
153+ "RSS vs Time" : {
154+ "kind" : "line" ,
155+ "varX" : "timestamp" ,
156+ "varY" : "rss_gb" ,
157+ "title" : "RSS over Time" ,
158+ "sort" : "timestamp"
159+ },
160+ "RSS vs Step (chronological)" : {
161+ "kind" : "line" ,
162+ "varX" : "rowID" ,
163+ "varY" : "rss_gb" ,
164+ "title" : "RSS vs Step" ,
165+ "xlabel" : "step" ,
166+ "xticklabels" : "step" ,
167+ "sort" : "rowID"
168+ },
169+ "Elapsed Time vs Step" : {
170+ "kind" : "bar" ,
171+ "varX" : "step" ,
172+ "varY" : "elapsed_sec" ,
173+ "title" : "Elapsed Time per Step" ,
174+ "sort" : None
175+ },
176+ "RSS Summary Stats" : {
177+ "varX" : "step" ,
178+ "varY" : "rss_gb" ,
179+ "aggregation" : ["mean" , "median" , "std" ],
180+ "title" : "RSS Summary Statistics" ,
181+ "xlabel" : "Step" ,
182+ "ylabel" : "RSS (GB)" ,
183+ "sort" : "step"
184+ },
185+ "Elapsed Time Summary Stats" : {
186+ "varX" : "step" ,
187+ "varY" : "elapsed_sec" ,
188+ "aggregation" : ["mean" , "median" , "std" ],
189+ "title" : "Elapsed Time Summary Statistics" ,
190+ "xlabel" : "Step" ,
191+ "ylabel" : "Elapsed Time (s)" ,
192+ "sort" : "step"
193+ },
144194}
145195
146196default_summary_config = {
197+ "summary_by_step" : {
147198 "by" : ["step" ],
148- "stats" : ["mean" , "max" , "min" ]
199+ "stats" : ["mean" , "max" , "min" , "count" ]
200+ },
201+ "summary_by_step_and_index" : {
202+ "by" : ["step" , "index_0" ],
203+ "stats" : ["mean" , "max" , "min" , "count" ]
149204 }
150-
205+ }
0 commit comments