Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
08a17ce
add pickle deprecation
sahiljhawar May 4, 2026
859a806
DeprecationWarning->FutureWarning
sahiljhawar May 5, 2026
c970cbc
DeprecationWarning->FutureWarning
sahiljhawar May 5, 2026
af9aa9f
exception->error
sahiljhawar May 5, 2026
a5fc656
feat: add append_data method for mat files and corresponding tests to…
sahiljhawar May 5, 2026
0316461
mark test as basic
sahiljhawar May 5, 2026
7be0040
feat: extend SavingStrategy to support netCDF format and refactor sav…
sahiljhawar May 5, 2026
0426440
feat: use SingleFileStrategy as base for MonthlyH5 and add append_dat…
sahiljhawar May 5, 2026
a8181d4
fix: update incoming metadata with old
sahiljhawar May 5, 2026
5bf96e9
feat: add single file CDF format
sahiljhawar May 5, 2026
e1e1b81
fix: fix append condition
sahiljhawar May 5, 2026
e5cc7ee
feat: add registry for custom writers
sahiljhawar May 5, 2026
55adc4e
fix: fix empty dicts in cdf globalattrs
sahiljhawar May 5, 2026
1073fed
chore: warn on var not found
sahiljhawar May 5, 2026
da73a77
chore: remove double import
sahiljhawar May 6, 2026
6ffccc1
remove map_standard_name
sahiljhawar May 6, 2026
5b39495
refactor: refactor monthly saving into format-dispatched strategy
sahiljhawar May 6, 2026
4f4402a
add imports
sahiljhawar May 6, 2026
1975f19
remove old code
sahiljhawar May 6, 2026
07a0c93
refactor density ncdf
sahiljhawar May 6, 2026
1dccd74
add contrib name
sahiljhawar May 6, 2026
4402724
docs: update docs and add inline examples
sahiljhawar May 6, 2026
4045bae
feat: implemented method to add custom variables which are not standa…
sahiljhawar May 6, 2026
33a0f86
fix: add save_single_method in dataorg
sahiljhawar May 7, 2026
ad0a419
Merge branch 'main' into sahiljhawar/saving-strategies
sahiljhawar May 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 1 addition & 3 deletions docs/API_reference/overview.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,9 +64,7 @@ This section provides a detailed reference for all modules, classes, and functio

[DataOrgStrategy](saving_strategies/data_org.md)

[MonthlyH5Strategy](saving_strategies/monthly_h5.md)

[MonthlyNetCDFStrategy](saving_strategies/monthly_netcdf.md)
[MonthlyFileStrategy](saving_strategies/monthly.md)

[SingleFileStrategy](saving_strategies/single_file.md)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ SPDX-FileContributor: Bernhard Haas
SPDX-License-Identifier: Apache-2.0
-->

::: el_paso.saving_strategies.monthly_h5_strategy.MonthlyH5Strategy
::: el_paso.saving_strategies.monthly_strategy.MonthlyFileStrategy
options:
members:
- __init__
11 changes: 0 additions & 11 deletions docs/API_reference/saving_strategies/monthly_netcdf.md

This file was deleted.

1 change: 1 addition & 0 deletions docs/API_reference/saving_strategies/single_file.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,3 +9,4 @@ SPDX-License-Identifier: Apache-2.0
options:
members:
- __init__
- register_writer
10 changes: 5 additions & 5 deletions el_paso/processing/compute_pitch_angles_for_telescopes.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,6 @@ def _compute_pitch_angles_for_telescopes(
tele_alpha_angles: NDArray[np.floating],
tele_beta_angles: NDArray[np.floating],
) -> NDArray[np.floating]:

if b_tele_aligned.shape[1] != 3: # noqa: PLR2004
msg = "Magnetic field input must be a vector with 3 components!"
raise ValueError(msg)
Expand All @@ -26,7 +25,7 @@ def _compute_pitch_angles_for_telescopes(
b_unit_vectors = b_tele_aligned / btot

# convert to standard shperical coordinates
theta_zone = np.pi/2 - tele_alpha_angles
theta_zone = np.pi / 2 - tele_alpha_angles
phi_zone = tele_beta_angles

# velocity directions of particles: reverse telescope look direction
Expand All @@ -44,10 +43,11 @@ def _compute_pitch_angles_for_telescopes(

return pitch_angles


def compute_pitch_angles_for_telescopes(
b_tele_aligned: ep.Variable,
tele_alpha_angles: ep.Variable,
tele_beta_angles: ep.Variable,
b_tele_aligned: ep.Variable,
tele_alpha_angles: ep.Variable,
tele_beta_angles: ep.Variable,
) -> ep.Variable:
"""Calculates the particle pitch angles for specific telescope orientations.

Expand Down
8 changes: 4 additions & 4 deletions el_paso/saving_strategies/__init__.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,18 @@
# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
# SPDX-FileContributor: Bernhard Haas
# SPDX-FileContributor: Sahil Jhawar
#
# SPDX-License-Identifier: Apache-2.0

from el_paso.saving_strategies.data_org_strategy import DataOrgStrategy
from el_paso.saving_strategies.density_netcdf_strategy import DensityNetCDFStrategy
from el_paso.saving_strategies.monthly_h5_strategy import MonthlyH5Strategy
from el_paso.saving_strategies.monthly_netcdf_strategy import MonthlyNetCDFStrategy
from el_paso.saving_strategies.monthly_strategy import MFSFormats, MonthlyFileStrategy
from el_paso.saving_strategies.single_file_strategy import SingleFileStrategy

__all__ = [
"DataOrgStrategy",
"DensityNetCDFStrategy",
"MonthlyH5Strategy",
"MonthlyNetCDFStrategy",
"MFSFormats",
"MonthlyFileStrategy",
"SingleFileStrategy",
]
169 changes: 140 additions & 29 deletions el_paso/saving_strategies/data_org_strategy.py
Original file line number Diff line number Diff line change
@@ -1,18 +1,22 @@
# SPDX-FileCopyrightText: 2025 GFZ Helmholtz Centre for Geosciences
# SPDX-FileContributor: Bernhard Haas
# SPDX-FileContributor: Sahil Jhawar
#
# SPDX-License-Identifier: Apache-2.0

from __future__ import annotations

import calendar
import logging
import pickle
import typing
import warnings
from datetime import datetime, timezone
from pathlib import Path
from typing import Any, Literal

import numpy as np
from scipy.io import loadmat, savemat

from el_paso.data_standards import DataOrgStandard
from el_paso.saving_strategy import OutputFile, SavingStrategy
Expand All @@ -22,6 +26,8 @@

from el_paso import Variable

logger = logging.getLogger(__name__)


class DataOrgStrategy(SavingStrategy):
"""A concrete saving strategy for saving data based on the satellite mission into separate monthly files.
Expand All @@ -32,6 +38,10 @@ class DataOrgStrategy(SavingStrategy):
variables to specific units and dimensions before saving. The data is saved
in either `.mat` or `.pickle` format, depending on user preference.

!!! warning "Deprecation"
The ``".pickle"`` file format is deprecated and will be removed in a future
release. Use ``".mat"`` instead.

Attributes:
output_files (list[OutputFile]): Pre-defined list of files to be saved,
each with a specific set of variables.
Expand Down Expand Up @@ -73,7 +83,18 @@ def __init__(
instrument (str): The instrument name.
kext (str): The model extension type. "TS04" is remapped to "T04s".
file_format (Literal[".mat", ".pickle"]): The desired format for the output files.
.. deprecated:: 1.0.3rc0
Passing ``".pickle"`` is deprecated and will be removed in a future release.
Use ``".mat"`` instead.
Comment on lines +86 to +88
Comment on lines +86 to +88
Comment on lines +86 to +88
"""
if file_format == ".pickle":
warnings.warn(
"The '.pickle' file format for DataOrgStrategy is deprecated and will be removed "
"in a future release. Use '.mat' instead.",
FutureWarning,
stacklevel=2,
)
Comment on lines +90 to +96
Comment on lines +90 to +96

self.base_data_path = Path(base_data_path)
self.mission = mission
self.satellite = satellite
Expand Down Expand Up @@ -148,6 +169,7 @@ def get_time_intervals_to_save(

if start_time is None or end_time is None:
msg = "start_time and end_time must be provided for DataOrgStrategy!"
logger.error(msg)
raise ValueError(msg)

current_time = start_time.replace(day=1)
Expand Down Expand Up @@ -196,15 +218,13 @@ def get_file_path(self, interval_start: datetime, interval_end: datetime, output

return self.base_data_path / self.mission.upper() / self.satellite.lower() / "Processed_Mat_Files" / file_name

def append_data(self, file_path: Path, data_dict_to_save: dict[str, Any]) -> dict[str, Any]:
def _merge_data_dicts_by_time(
self, data_dict_old: dict[str, Any], data_dict_to_save: dict[str, Any]
) -> dict[str, Any]:
"""Appends new data to an existing file by combining the new and old data dictionaries.

This method handles `pickle` files specifically, loading the old data, merging it with the
new data based on time, and then returning the merged dictionary. It raises an error if
the time values are not unique after concatenation.

Parameters:
file_path (Path): The path to the existing file to append to.
data_dict_old (dict[str, Any]): The dictionary with existing data.
data_dict_to_save (dict[str, Any]): The dictionary with new data to be added.

Returns:
Expand All @@ -214,38 +234,129 @@ def append_data(self, file_path: Path, data_dict_to_save: dict[str, Any]) -> dic
ValueError: If a key mismatch occurs between the dictionaries or if the concatenated
time array contains non-unique values.
"""
time_1 = np.atleast_1d(np.squeeze(data_dict_old["time"]))
time_2 = np.atleast_1d(np.squeeze(data_dict_to_save["time"]))

idx_to_insert = int(np.searchsorted(time_1, time_2[0]))

time_1_in_2 = np.isin(time_1, time_2)

for key, value_1 in data_dict_old.items():
if key.startswith("__"):
continue

if key == "metadata":
value_2 = data_dict_to_save.get(key)
if isinstance(value_1, dict) and isinstance(value_2, dict):
data_dict_to_save[key] = {**value_1, **value_2}
elif key not in data_dict_to_save:
data_dict_to_save[key] = value_1

if key not in data_dict_to_save:
msg = "Key mismatch when concatenating data dicts!"
logger.error(msg)
raise ValueError(msg)

if isinstance(value_1, np.ndarray):
value_1_truncated = typing.cast("NDArray[np.floating]", value_1[~time_1_in_2])

value_2 = data_dict_to_save[key]

concatenated_value = (
value_2
if value_1_truncated.size == 0
else np.insert(value_1_truncated, idx_to_insert, value_2, axis=0)
)

if key == "time" and len(np.unique(concatenated_value)) != len(concatenated_value):
msg = "Time values were not unique when concatenating arrays!"
logger.error(msg)
raise ValueError(msg)
data_dict_to_save[key] = concatenated_value

return data_dict_to_save

def _append_mat_data(self, file_path: Path, data_dict_to_save: dict[str, Any]) -> dict[str, Any]:
"""Load an existing MATLAB file and merge the new data into it."""
data_dict_old = loadmat(str(file_path))
return self._merge_data_dicts_by_time(data_dict_old, data_dict_to_save)

def _append_pickle_data(self, file_path: Path, data_dict_to_save: dict[str, Any]) -> dict[str, Any]:
"""Load an existing pickle file and merge the new data into it."""
warnings.warn(
"Appending to '.pickle' files is deprecated alongside the '.pickle' format and will "
"be removed in a future release. Switch to '.mat' to avoid this warning.",
FutureWarning,
stacklevel=2,
)

with file_path.open("rb") as file:
data_dict_old = pickle.load(file) # noqa: S301

time_1 = np.squeeze(data_dict_old["time"])
time_2 = np.squeeze(data_dict_to_save["time"])
return self._merge_data_dicts_by_time(data_dict_old, data_dict_to_save)

idx_to_insert = typing.cast("int", np.searchsorted(time_1, time_2[0]))
def append_data(self, file_path: Path, data_dict_to_save: dict[str, Any]) -> dict[str, Any]:
"""Appends new data to an existing DataOrg file.

time_1_in_2 = np.squeeze(np.isin(time_1, time_2))
Existing data is loaded from the file, overlapping time stamps are replaced
by the new block, and the merged dictionary is returned for the caller to
write back to disk.

for key, value_1 in data_dict_old.items():
if key not in data_dict_to_save:
msg = "Key missmatch when concatenating data dicts!"
raise ValueError(msg)
.. deprecated:: 1.0.3rc0
Support for appending to ``.pickle`` files is deprecated alongside the ``.pickle``
format itself. This code path will be removed in a future release.

Parameters:
file_path (Path): The path to the existing file to append to.
data_dict_to_save (dict[str, Any]): The dictionary with new data to be added.

Returns:
dict[str, Any]: A new dictionary containing the merged old and new data.

if isinstance(value_1, np.ndarray):
value_1_truncated = typing.cast("NDArray[np.floating]", value_1[~time_1_in_2])
Raises:
ValueError: If a key mismatch occurs between the dictionaries or if the concatenated
time array contains non-unique values.
NotImplementedError: If ``append`` is requested for an unsupported file format.
"""
match file_path.suffix:
case ".mat":
return self._append_mat_data(file_path, data_dict_to_save)
case ".pickle":
return self._append_pickle_data(file_path, data_dict_to_save)
case _:
msg = f"Appending to '{file_path.suffix}' files is not supported by DataOrgStrategy."
logger.error(msg)
raise NotImplementedError(msg)

def save_single_file(self, file_path: Path, dict_to_save: dict[str, Any], *, append: bool = False) -> None:
"""Saves variable data to a single file in one of the supported formats (.mat, .pickle, .h5).

value_2 = data_dict_to_save[key]
Parameters:
file_path (Path): The path to the file where the dictionary will be saved.
The file extension determines the format.
dict_to_save (dict[str, Any]): The dictionary containing variable data to save.
append (bool, optional): If True and the file exists, appends data to the existing file (if supported).
Defaults to False.

Raises:
NotImplementedError: If the file format specified by the file extension is not supported.

Supported formats:
- .mat: Saves using scipy.io.savemat.
- .pickle: Saves using pickle.dump.
"""
logger.info(f"Saving file {file_path.name}...")

concatenated_value = (
value_2
if value_1_truncated.size == 0
else np.insert(value_1_truncated, idx_to_insert, value_2, axis=0)
)
file_path.parent.mkdir(parents=True, exist_ok=True)
format_name = file_path.suffix.lower()

if key == "time" and len(np.unique(concatenated_value)) != len(concatenated_value):
msg = "Time values were not unique when concatinating arrays!"
raise ValueError(msg)
data_dict_to_save[key] = concatenated_value
if file_path.exists() and append:
dict_to_save = self.append_data(file_path, dict_to_save)

elif isinstance(value_1, dict): # this is the metadata dict
continue
if format_name == ".mat":
# Save the dictionary into a .mat file
savemat(str(file_path), dict_to_save)

return data_dict_to_save
elif format_name == ".pickle":
with file_path.open("wb") as file:
pickle.dump(dict_to_save, file)
Loading
Loading