@@ -191,23 +191,49 @@ def materialize_all(self, dtype=None):
191191 print (f"Failed to materialize { name } : { e } " )
192192
193193 def save (self , path_prefix , dropAliasColumns = True ):
194+ import pyarrow as pa
195+ import pyarrow .parquet as pq
196+
194197 if dropAliasColumns :
195198 cols = [c for c in self .df .columns if c not in self .aliases ]
196199 else :
197200 cols = list (self .df .columns )
198- self .df [cols ].to_parquet (f"{ path_prefix } .parquet" , compression = "zstd" )
201+
202+ # Save Parquet with metadata
203+ table = pa .Table .from_pandas (self .df [cols ])
204+ metadata = {
205+ "aliases" : json .dumps (self .aliases ),
206+ "dtypes" : json .dumps ({k : v .__name__ for k , v in self .alias_dtypes .items ()})
207+ }
208+ existing_meta = table .schema .metadata or {}
209+ combined_meta = existing_meta .copy ()
210+ combined_meta .update ({k .encode (): v .encode () for k , v in metadata .items ()})
211+ table = table .replace_schema_metadata (combined_meta )
212+ pq .write_table (table , f"{ path_prefix } .parquet" , compression = "zstd" )
213+
214+ # Also write JSON file for explicit tracking
199215 with open (f"{ path_prefix } .aliases.json" , "w" ) as f :
200- json .dump ({ "aliases" : self . aliases , "dtypes" : { k : str ( v ) for k , v in self . alias_dtypes . items ()}} , f , indent = 2 )
216+ json .dump (metadata , f , indent = 2 )
201217
202218 @staticmethod
203219 def load (path_prefix ):
204- df = pd . read_parquet ( f" { path_prefix } . parquet" )
205- with open (f"{ path_prefix } .aliases.json" ) as f :
206- data = json . load ( f )
220+ import pyarrow . parquet as pq
221+ table = pq . read_table (f"{ path_prefix } .parquet" )
222+ df = table . to_pandas ( )
207223 adf = AliasDataFrame (df )
208- adf .aliases = data ["aliases" ]
209- if "dtypes" in data :
210- adf .alias_dtypes = {k : getattr (np , v ) for k , v in data ["dtypes" ].items ()}
224+
225+ # Try metadata first
226+ meta = table .schema .metadata or {}
227+ if b"aliases" in meta and b"dtypes" in meta :
228+ adf .aliases = json .loads (meta [b"aliases" ].decode ())
229+ adf .alias_dtypes = {k : getattr (np , v ) for k , v in json .loads (meta [b"dtypes" ].decode ()).items ()}
230+ else :
231+ # Fallback to JSON
232+ with open (f"{ path_prefix } .aliases.json" ) as f :
233+ data = json .load (f )
234+ adf .aliases = json .loads (data ["aliases" ])
235+ adf .alias_dtypes = {k : getattr (np , v ) for k , v in json .loads (data ["dtypes" ]).items ()}
236+
211237 return adf
212238
213239 def export_tree (self , filename , treename = "tree" , dropAliasColumns = True ):
0 commit comments