Skip to content

Commit 54de3fd

Browse files
author
miranov25
committed
Add support for dtype persistence and alias filtering in save/load
- Extend `save()` with dropAliasColumns to skip derived columns (before done only for TTree) - Store alias output dtypes in JSON metadata - Restore dtypes on load using numpy type resolution
1 parent 4a7d520 commit 54de3fd

File tree

1 file changed

+11
-5
lines changed

1 file changed

+11
-5
lines changed

UTILS/dfextensions/AliasDataFrame.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -190,18 +190,24 @@ def materialize_all(self, dtype=None):
190190
except Exception as e:
191191
print(f"Failed to materialize {name}: {e}")
192192

193-
def save(self, path_prefix):
194-
self.df.to_parquet(f"{path_prefix}.parquet", compression="zstd")
193+
def save(self, path_prefix, dropAliasColumns=True):
194+
if dropAliasColumns:
195+
cols = [c for c in self.df.columns if c not in self.aliases]
196+
else:
197+
cols = list(self.df.columns)
198+
self.df[cols].to_parquet(f"{path_prefix}.parquet", compression="zstd")
195199
with open(f"{path_prefix}.aliases.json", "w") as f:
196-
json.dump(self.aliases, f, indent=2)
200+
json.dump({"aliases": self.aliases, "dtypes": {k: str(v) for k, v in self.alias_dtypes.items()}}, f, indent=2)
197201

198202
@staticmethod
199203
def load(path_prefix):
200204
df = pd.read_parquet(f"{path_prefix}.parquet")
201205
with open(f"{path_prefix}.aliases.json") as f:
202-
aliases = json.load(f)
206+
data = json.load(f)
203207
adf = AliasDataFrame(df)
204-
adf.aliases = aliases
208+
adf.aliases = data["aliases"]
209+
if "dtypes" in data:
210+
adf.alias_dtypes = {k: getattr(np, v) for k, v in data["dtypes"].items()}
205211
return adf
206212

207213
def export_tree(self, filename, treename="tree", dropAliasColumns=True):

0 commit comments

Comments
 (0)