1111
1212class SubframeRegistry :
1313 def __init__ (self ):
14- self .subframes = {}
14+ self .subframes = {} # name → {'frame': adf, 'index': index_columns}
1515
16- def add_subframe (self , name , alias_df ):
17- self .subframes [name ] = alias_df
16+ def add_subframe (self , name , alias_df , index_columns , pre_index = False ):
17+ if pre_index and not alias_df .df .index .names == index_columns :
18+ alias_df .df .set_index (index_columns , inplace = True )
19+ self .subframes [name ] = {'frame' : alias_df , 'index' : index_columns }
1820
1921 def get (self , name ):
22+ return self .subframes .get (name , {}).get ('frame' , None )
23+
24+ def get_entry (self , name ):
2025 return self .subframes .get (name , None )
2126
2227 def items (self ):
2328 return self .subframes .items ()
2429
30+
2531def convert_expr_to_root (expr ):
2632 class RootTransformer (ast .NodeTransformer ):
2733 FUNC_MAP = {
@@ -77,8 +83,8 @@ def __getattr__(self, item):
7783 return self .df [item ]
7884 raise AttributeError (f"'{ type (self ).__name__ } ' object has no attribute '{ item } '" )
7985
80- def register_subframe (self , name , adf ):
81- self ._subframes .add_subframe (name , adf )
86+ def register_subframe (self , name , adf , index_columns , pre_index = False ):
87+ self ._subframes .add_subframe (name , adf , index_columns , pre_index = pre_index )
8288
8389 def get_subframe (self , name ):
8490 return self ._subframes .get (name )
@@ -88,10 +94,41 @@ def _default_functions(self):
8894 env = {k : getattr (math , k ) for k in dir (math ) if not k .startswith ("_" )}
8995 env .update ({k : getattr (np , k ) for k in dir (np ) if not k .startswith ("_" )})
9096 env ["np" ] = np
91- for sf_name , sf in self ._subframes .items ():
92- env [sf_name ] = sf
97+ for sf_name , sf_entry in self ._subframes .items ():
98+ env [sf_name ] = sf_entry [ 'frame' ]
9399 return env
94100
101+ def _prepare_subframe_joins (self , expr ):
102+ tokens = re .findall (r'(\b\w+)\.(\w+)' , expr )
103+ for sf_name , sf_col in tokens :
104+ entry = self ._subframes .get_entry (sf_name )
105+ if not entry :
106+ continue
107+ sub_adf = entry ['frame' ]
108+ sub_df = sub_adf .df
109+ index_cols = entry ['index' ]
110+ if isinstance (index_cols , str ):
111+ index_cols = [index_cols ]
112+ merge_cols = index_cols + [sf_col ]
113+ suffix = f'__{ sf_name } '
114+
115+ try :
116+ cols_to_merge = sub_df [merge_cols ]
117+ except KeyError :
118+ if sf_col in sub_adf .aliases :
119+ sub_adf .materialize_alias (sf_col )
120+ sub_df = sub_adf .df
121+ cols_to_merge = sub_df [merge_cols ]
122+ else :
123+ raise KeyError (f"Subframe '{ sf_name } ' does not contain or define alias '{ sf_col } '" )
124+
125+ joined = self .df .merge (cols_to_merge , on = index_cols , suffixes = ('' , suffix ))
126+ col_renamed = f'{ sf_col } { suffix } '
127+ if col_renamed in joined .columns :
128+ self .df [col_renamed ] = joined [col_renamed ].values
129+ expr = expr .replace (f'{ sf_name } .{ sf_col } ' , col_renamed )
130+ return expr
131+
95132 def _check_for_cycles (self ):
96133 try :
97134 self ._topological_sort ()
@@ -107,8 +144,8 @@ def add_alias(self, name, expression, dtype=None, is_constant=False):
107144 self ._check_for_cycles ()
108145
109146 def _eval_in_namespace (self , expr ):
147+ expr = self ._prepare_subframe_joins (expr )
110148 local_env = {col : self .df [col ] for col in self .df .columns }
111- local_env .update ({k : self .df [k ] for k in self .aliases if k in self .df })
112149 local_env .update (self ._default_functions ())
113150 return eval (expr , {}, local_env )
114151
@@ -300,8 +337,8 @@ def export_tree(self, filename_or_file, treename="tree", dropAliasColumns=True):
300337 self ._write_metadata_to_root (filename_or_file , treename )
301338 else :
302339 self ._write_to_uproot (filename_or_file , treename , dropAliasColumns )
303- for subframe_name , sub_adf in self ._subframes .items ():
304- sub_adf ._write_metadata_to_root (filename_or_file , f"{ treename } __subframe__{ subframe_name } " )
340+ for subframe_name , entry in self ._subframes .items ():
341+ entry [ "frame" ] ._write_metadata_to_root (filename_or_file , f"{ treename } __subframe__{ subframe_name } " )
305342
306343 def _write_to_uproot (self , uproot_file , treename , dropAliasColumns ):
307344 export_cols = [col for col in self .df .columns if not dropAliasColumns or col not in self .aliases ]
@@ -310,8 +347,8 @@ def _write_to_uproot(self, uproot_file, treename, dropAliasColumns):
310347
311348 uproot_file [treename ] = export_df
312349
313- for subframe_name , sub_adf in self ._subframes .items ():
314- sub_adf .export_tree (uproot_file , f"{ treename } __subframe__{ subframe_name } " , dropAliasColumns )
350+ for subframe_name , entry in self ._subframes .items ():
351+ entry [ "frame" ] .export_tree (uproot_file , f"{ treename } __subframe__{ subframe_name } " , dropAliasColumns )
315352
316353 def _write_metadata_to_root (self , filename , treename ):
317354 f = ROOT .TFile .Open (filename , "UPDATE" )
@@ -325,6 +362,7 @@ def _write_metadata_to_root(self, filename, treename):
325362 tree .SetAlias (alias , expr_str )
326363 metadata = {
327364 "aliases" : self .aliases ,
365+ "subframe_indices" : {k : v ["index" ] for k , v in self ._subframes .items ()},
328366 "dtypes" : {k : v .__name__ for k , v in self .alias_dtypes .items ()},
329367 "constants" : list (self .constant_aliases ),
330368 "subframes" : list (self ._subframes .subframes .keys ())
@@ -334,6 +372,7 @@ def _write_metadata_to_root(self, filename, treename):
334372 tree .Write ("" , ROOT .TObject .kOverwrite )
335373 f .Close ()
336374
375+ @staticmethod
337376 def read_tree (filename , treename = "tree" ):
338377 with uproot .open (filename ) as f :
339378 df = f [treename ].arrays (library = "pd" )
@@ -354,7 +393,10 @@ def read_tree(filename, treename="tree"):
354393 adf .constant_aliases .update (jmeta .get ("constants" , []))
355394 for sf_name in jmeta .get ("subframes" , []):
356395 sf = AliasDataFrame .read_tree (filename , treename = f"{ treename } __subframe__{ sf_name } " )
357- adf .register_subframe (sf_name , sf )
396+ index = jmeta .get ("subframe_indices" , {}).get (sf_name )
397+ if index is None :
398+ raise ValueError (f"Missing index_columns for subframe '{ sf_name } ' in metadata" )
399+ adf .register_subframe (sf_name , sf , index_columns = index )
358400 break
359401 except Exception :
360402 pass
0 commit comments