Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
40 changes: 40 additions & 0 deletions src/access_moppy/atmosphere.py
Original file line number Diff line number Diff line change
Expand Up @@ -115,6 +115,37 @@ def remove_spurious_time_dimensions(self, required_vars):
}
self.ds = self.ds.assign(corrections)

def _retarget_renamed_references(self, rename_map):
"""Rewrite ``coordinates`` / ``formula_terms`` attribute strings to the
post-rename variable names.

``Dataset.rename`` relabels variables but leaves attribute strings that
reference them untouched. Only tokens that are keys in ``rename_map`` are
rewritten, so variables whose references were not renamed are unaffected.

- ``coordinates``: a space-separated list of variable names.
- ``formula_terms``: ``"<term>: <variable> ..."`` — only the variable
tokens (those not ending in ``:``) are remapped; the term names are
left as-is.
"""
if not rename_map:
return
for var in self.ds.variables:
attrs = self.ds[var].attrs

coords = attrs.get("coordinates")
if isinstance(coords, str) and coords:
attrs["coordinates"] = " ".join(
rename_map.get(tok, tok) for tok in coords.split()
)

terms = attrs.get("formula_terms")
if isinstance(terms, str) and terms:
attrs["formula_terms"] = " ".join(
tok if tok.endswith(":") else rename_map.get(tok, tok)
for tok in terms.split()
)

def select_and_process_variables(self):
# Check if this is an internal calculation that doesn't need input variables
calc = self.mapping[self.cmor_name]["calculation"]
Expand Down Expand Up @@ -263,6 +294,15 @@ def select_and_process_variables(self):
self.ds = self.ds.drop_vars(conflicting_vars, errors="ignore")

self.ds = self.ds.rename(rename_map)
# rename() relabels the variables but not the attribute *strings* that
# reference them. Re-point any `coordinates` / `formula_terms` references
# at the new names so hybrid-height terms (e.g. sigma_theta -> b,
# surface_altitude -> orog, theta_level_height -> lev) resolve instead of
# dangling on the pre-rename input names. Use the full intended rename
# (not the filtered `rename_map`): a dataset_function such as
# cl_level_to_height renames theta_level_height -> lev itself, so that
# key is absent from `rename_map` yet still referenced in the attrs.
self._retarget_renamed_references({**bounds_rename_map, **axes_rename_map})
# Drop stale units from renamed coordinates; update_attributes will
# assign the correct CMIP units from the vocabulary.
for old_name, new_name in rename_map.items():
Expand Down
99 changes: 99 additions & 0 deletions tests/unit/test_atmosphere.py
Original file line number Diff line number Diff line change
Expand Up @@ -1849,3 +1849,102 @@ def test_unsupported_calc_type_lists_supported(self, tmp_path):
assert "formula" in msg
assert "dataset_function" in msg
assert "internal" in msg


# ---------------------------------------------------------------------------
# Tests for _retarget_renamed_references (hybrid-height coordinate/formula_terms)
# ---------------------------------------------------------------------------


def _bare_atmos_cmoriser(ds):
"""An Atmosphere_CMORiser with only .ds set, for testing pure helpers."""
cmoriser = object.__new__(Atmosphere_CMORiser)
cmoriser.ds = ds
return cmoriser


class TestRetargetRenamedReferences:
"""
After Dataset.rename(), the `coordinates`/`formula_terms` attribute *strings*
still reference the pre-rename input names. _retarget_renamed_references must
re-point them at the new names so hybrid-height terms resolve, while leaving
references that were not renamed (i.e. other variables) untouched.
"""

# The full intended rename for cl-family variables.
RENAME = {
"theta_level_height": "lev",
"sigma_theta": "b",
"surface_altitude": "orog",
"lat": "lat", # identity entries must be harmless
"lon": "lon",
"time": "time",
}

def _cl_like_ds(self):
return xr.Dataset(
{
"cl": (
["lev"],
np.zeros(3),
{"coordinates": "sigma_theta surface_altitude theta_level_height"},
),
"lev": (
["lev"],
np.arange(3, dtype=float),
{
"formula_terms": (
"a: theta_level_height b: sigma_theta orog: surface_altitude"
)
},
),
}
)

@pytest.mark.unit
def test_coordinates_retargeted_to_new_names(self):
cmoriser = _bare_atmos_cmoriser(self._cl_like_ds())
cmoriser._retarget_renamed_references(self.RENAME)
assert cmoriser.ds["cl"].attrs["coordinates"] == "b orog lev"

@pytest.mark.unit
def test_formula_terms_variables_remapped_term_keys_preserved(self):
cmoriser = _bare_atmos_cmoriser(self._cl_like_ds())
cmoriser._retarget_renamed_references(self.RENAME)
# term keys (a:, b:, orog:) preserved; variable tokens remapped
assert cmoriser.ds["lev"].attrs["formula_terms"] == "a: lev b: b orog: orog"

@pytest.mark.unit
def test_unrenamed_references_untouched(self):
"""A variable referencing names absent from rename_map is unchanged."""
ds = xr.Dataset(
{
"tas": (
["lat", "lon"],
np.zeros((2, 2)),
{"coordinates": "height"},
),
},
coords={"lat": [0.0, 1.0], "lon": [0.0, 1.0]},
)
cmoriser = _bare_atmos_cmoriser(ds)
cmoriser._retarget_renamed_references(self.RENAME)
assert cmoriser.ds["tas"].attrs["coordinates"] == "height"

@pytest.mark.unit
def test_empty_rename_map_is_noop(self):
ds = self._cl_like_ds()
cmoriser = _bare_atmos_cmoriser(ds)
cmoriser._retarget_renamed_references({})
assert (
cmoriser.ds["cl"].attrs["coordinates"]
== "sigma_theta surface_altitude theta_level_height"
)

@pytest.mark.unit
def test_variables_without_references_unaffected(self):
"""Variables with no coordinates/formula_terms must not raise or change."""
ds = xr.Dataset({"pr": (["lat"], np.zeros(2), {"units": "kg m-2 s-1"})})
cmoriser = _bare_atmos_cmoriser(ds)
cmoriser._retarget_renamed_references(self.RENAME)
assert cmoriser.ds["pr"].attrs == {"units": "kg m-2 s-1"}