@@ -15,17 +15,6 @@ class AliasDataFrame:
1515 """
1616 A wrapper for pandas DataFrame that supports on-demand computed columns (aliases)
1717 with dependency tracking and persistence.
18- Example usage:
19- >>> df = pd.DataFrame({"x": [1, 2, 3], "y": [10, 20, 30]})
20- >>> adf = AliasDataFrame(df)
21- >>> adf.add_alias("z", "x + y")
22- >>> adf.add_alias("w", "z * 2")
23- >>> adf.materialize_all()
24- >>> print(adf.df)
25- You can also save and load the dataframe along with aliases:
26- >>> adf.save("mydata")
27- >>> adf2 = AliasDataFrame.load("mydata")
28- >>> adf2.describe_aliases()
2918 """
3019
3120 def __init__ (self , df ):
@@ -35,10 +24,6 @@ def __init__(self, df):
3524 self .constant_aliases = set () # Optional set of constants that should not be materialized
3625
3726 def add_alias (self , name , expression , dtype = None , is_constant = False ):
38- """
39- Add an alias expression to the DataFrame.
40- Optionally specify output dtype and whether it's a constant (scalar-only).
41- """
4227 try :
4328 dummy_env = {k : 1 for k in list (self .df .columns ) + list (self .aliases .keys ())}
4429 dummy_env .update (self ._default_functions ())
@@ -142,6 +127,12 @@ def materialize_alias0(self, name, dtype=None):
142127 if name in self .aliases :
143128 local_env = {col : self .df [col ] for col in self .df .columns }
144129 local_env .update ({k : self .df [k ] for k in self .aliases if k in self .df })
130+ for cname in self .constant_aliases :
131+ try :
132+ val = eval (self .aliases [cname ], self ._default_functions ())
133+ local_env [cname ] = val
134+ except Exception as e :
135+ print (f"[Alias constant] Failed to evaluate constant '{ cname } ': { e } " )
145136 result = eval (self .aliases [name ], self ._default_functions (), local_env )
146137 result_dtype = dtype or self .alias_dtypes .get (name )
147138 if result_dtype is not None :
@@ -177,6 +168,12 @@ def visit(n):
177168 continue
178169 local_env = {col : self .df [col ] for col in self .df .columns }
179170 local_env .update ({k : self .df [k ] for k in self .aliases if k in self .df })
171+ for cname in self .constant_aliases :
172+ try :
173+ val = eval (self .aliases [cname ], self ._default_functions ())
174+ local_env [cname ] = val
175+ except Exception as e :
176+ print (f"[Alias constant] Failed to evaluate constant '{ cname } ': { e } " )
180177 try :
181178 result = eval (self .aliases [alias ], self ._default_functions (), local_env )
182179 result_dtype = dtype or self .alias_dtypes .get (alias )
@@ -202,6 +199,9 @@ def materialize_all(self, dtype=None):
202199 try :
203200 local_env = {col : self .df [col ] for col in self .df .columns }
204201 local_env .update ({k : self .df [k ] for k in self .df .columns if k in self .aliases })
202+ for cname in self .constant_aliases :
203+ val = eval (self .aliases [cname ], self ._default_functions ())
204+ local_env [cname ] = val
205205 result = eval (self .aliases [name ], self ._default_functions (), local_env )
206206 result_dtype = dtype or self .alias_dtypes .get (name )
207207 if result_dtype is not None :
@@ -222,19 +222,18 @@ def save(self, path_prefix, dropAliasColumns=True):
222222 else :
223223 cols = list (self .df .columns )
224224
225- # Save Parquet with metadata
226225 table = pa .Table .from_pandas (self .df [cols ])
227226 metadata = {
228227 "aliases" : json .dumps (self .aliases ),
229- "dtypes" : json .dumps ({k : v .__name__ for k , v in self .alias_dtypes .items ()})
228+ "dtypes" : json .dumps ({k : v .__name__ for k , v in self .alias_dtypes .items ()}),
229+ "constants" : json .dumps (list (self .constant_aliases ))
230230 }
231231 existing_meta = table .schema .metadata or {}
232232 combined_meta = existing_meta .copy ()
233233 combined_meta .update ({k .encode (): v .encode () for k , v in metadata .items ()})
234234 table = table .replace_schema_metadata (combined_meta )
235235 pq .write_table (table , f"{ path_prefix } .parquet" , compression = "zstd" )
236236
237- # Also write JSON file for explicit tracking
238237 with open (f"{ path_prefix } .aliases.json" , "w" ) as f :
239238 json .dump (metadata , f , indent = 2 )
240239
@@ -245,17 +244,19 @@ def load(path_prefix):
245244 df = table .to_pandas ()
246245 adf = AliasDataFrame (df )
247246
248- # Try metadata first
249247 meta = table .schema .metadata or {}
250248 if b"aliases" in meta and b"dtypes" in meta :
251249 adf .aliases = json .loads (meta [b"aliases" ].decode ())
252250 adf .alias_dtypes = {k : getattr (np , v ) for k , v in json .loads (meta [b"dtypes" ].decode ()).items ()}
251+ if b"constants" in meta :
252+ adf .constant_aliases = set (json .loads (meta [b"constants" ].decode ()))
253253 else :
254- # Fallback to JSON
255254 with open (f"{ path_prefix } .aliases.json" ) as f :
256255 data = json .load (f )
257256 adf .aliases = json .loads (data ["aliases" ])
258257 adf .alias_dtypes = {k : getattr (np , v ) for k , v in json .loads (data ["dtypes" ]).items ()}
258+ if "constants" in data :
259+ adf .constant_aliases = set (json .loads (data ["constants" ]))
259260
260261 return adf
261262
@@ -272,7 +273,13 @@ def export_tree(self, filename, treename="tree", dropAliasColumns=True):
272273 f = ROOT .TFile .Open (filename , "UPDATE" )
273274 tree = f .Get (treename )
274275 for alias , expr in self .aliases .items ():
275- tree .SetAlias (alias , expr )
276+ expr_str = expr
277+ try :
278+ val = float (expr )
279+ expr_str = f"({ val } +0)"
280+ except Exception :
281+ pass
282+ tree .SetAlias (alias , expr_str )
276283 tree .Write ("" , ROOT .TObject .kOverwrite )
277284 f .Close ()
278285
0 commit comments