Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
50 changes: 48 additions & 2 deletions src/access_moppy/sea_ice.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,6 +106,30 @@ def _get_dim_rename(self):
f"source_id must start with 'ACCESS-'."
)

def _normalise_time_bounds(self):
"""Rename the model time-bounds variable to the CMOR ``time_bnds``/``bnds``.

CICE writes the time-averaging interval as ``time_bounds`` on a ``d2``
dimension; CMIP expects ``time_bnds`` on a ``bnds`` dimension. Follow the
time coordinate's ``bounds`` attribute to whatever variable it names and
rename it (and its non-time dimension) so the reference resolves. No-op
when the bounds variable is absent or already canonically named.
"""
if "time" not in self.ds:
return
bnds_name = self.ds["time"].attrs.get("bounds")
if not bnds_name or bnds_name not in self.ds:
return
rename = {}
if bnds_name != "time_bnds":
rename[bnds_name] = "time_bnds"
bnd_dim = next((d for d in self.ds[bnds_name].dims if d != "time"), None)
if bnd_dim and bnd_dim != "bnds":
rename[bnd_dim] = "bnds"
if rename:
self.ds = self.ds.rename(rename)
self.ds["time"].attrs["bounds"] = "time_bnds"

def select_and_process_variables(self):
"""Select and process variables for the CMOR output."""
calc = self.mapping[self.cmor_name]["calculation"]
Expand Down Expand Up @@ -149,11 +173,19 @@ def select_and_process_variables(self):
+ list(axes_rename_map.keys())
+ list(bounds_rename_map.keys())
)
# CICE writes the time-averaging interval as `time_bounds` (on a `d2`
# dimension), which does not match the `<dim>_bnds` name the bounds map
# expects. Pull it in explicitly, otherwise it is dropped at load and the
# `time:bounds` attribute is left pointing at a missing variable.
required.add("time_bounds")
self.load_dataset(required_vars=required)

# Ensure time dimension is sorted
self.sort_time_dimension()

# Normalise the model time-bounds variable to the CMOR name/dim.
self._normalise_time_bounds()

# Handle the calculation type
if calc["type"] in ("direct", "dataset_function") and not required_vars:
raise ValueError(
Expand Down Expand Up @@ -190,15 +222,18 @@ def select_and_process_variables(self):
# Get sea-ice dimension rename map
seaice_dim_rename = self._get_dim_rename()

# Rename axes and bounds variables
# Rename axes and bounds variables. The grid dimensions ni/nj are pure
# dimensions (no same-named coordinate variable), so `k in self.ds` alone
# would drop them and leave the data variable on (nj, ni) while the
# supergrid coordinates are built on (j, i); include dimension names too.
rename_map = {
k: v
for k, v in {
**bounds_rename_map,
**axes_rename_map,
**seaice_dim_rename,
}.items()
if k in self.ds
if k in self.ds or k in self.ds.dims
}

# Drop any existing variables that have the same names as our target names
Expand Down Expand Up @@ -272,6 +307,17 @@ def update_attributes(self):
{"standard_name": "longitude", "units": "degrees_east"}
)

# The supergrid latitude/longitude replace the model's curvilinear
# coordinates (renamed from TLAT/TLON to lat/lon). Drop the redundant
# originals — they carry no standard_name and would otherwise be
# mis-detected as data variables — and point the data variable at the
# supergrid auxiliary coordinates, clearing the stale
# `coordinates = "TLON TLAT time"` inherited from the model file.
self.ds = self.ds.drop_vars(
["lat", "lon", "lat_bnds", "lon_bnds"], errors="ignore"
)
self.ds[self.cmor_name].attrs["coordinates"] = "latitude longitude"

self.ds.attrs = {
k: v
for k, v in self.vocab.get_required_global_attributes().items()
Expand Down
169 changes: 169 additions & 0 deletions tests/unit/test_sea_ice.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,3 +262,172 @@ def test_update_attributes_sets_time_cf_attributes(self, temp_dir):

assert cmoriser.ds["time"].attrs["standard_name"] == "time"
assert cmoriser.ds["time"].attrs["axis"] == "T"

@pytest.mark.unit
def test_pure_dimension_grid_is_renamed_to_i_j(
self, mock_vocab, mock_mapping, temp_dir
):
"""ni/nj as *pure dimensions* (no coordinate variable) must still be
renamed to i/j, so the data variable aligns with the supergrid.

Reproduces the real-data case where the `if k in self.ds` filter dropped
the dimension rename and left siconc on (nj, ni).
"""
# No coords for nj/ni -> they are pure dimensions, unlike the other
# fixture which makes them coordinate variables.
ds = xr.Dataset(
data_vars={
"ice_conc": (
["nj", "time", "ni"],
np.random.random((2, 3, 4)),
{"coordinates": "TLON TLAT", "units": "1"},
)
},
coords={
"time": ("time", pd.date_range("2000-01-01", periods=3, freq="ME"))
},
)
assert "nj" not in ds.variables # pure dimension

with patch("access_moppy.sea_ice.Supergrid"):
with patch("access_moppy.ocean.CMORiser.load_dataset", return_value=None):
cmoriser = SeaIce_CMORiser(
input_paths=["test.nc"],
output_path=str(temp_dir),
compound_name="SImon.siconc",
vocab=mock_vocab,
variable_mapping=mock_mapping,
)
cmoriser.ds = ds
cmoriser.select_and_process_variables()

assert cmoriser.ds["siconc"].dims == ("time", "j", "i")
assert "nj" not in cmoriser.ds.dims and "ni" not in cmoriser.ds.dims

@pytest.mark.unit
def test_update_attributes_drops_model_lat_lon_and_sets_coordinates(self, temp_dir):
"""Model lat/lon (renamed from TLAT/TLON) are dropped in favour of the
supergrid latitude/longitude, and the data variable's stale coordinates
attribute is replaced with 'latitude longitude'."""
ny, nx, nt = 2, 4, 3
vocab = Mock()
vocab.source_id = "ACCESS-ESM1.6"
vocab.variable = {"units": "1", "type": "real"}
vocab._get_nominal_resolution = Mock(return_value="1deg")
vocab.get_required_global_attributes = Mock(return_value={})
vocab.axes = {
"time": {"out_name": "time", "standard_name": "time", "axis": "T"}
}
mapping = {
"siconc": {
"model_variables": ["ice_conc"],
"calculation": {"type": "direct"},
}
}
ds = xr.Dataset(
{
"siconc": (
["time", "j", "i"],
np.ones((nt, ny, nx), dtype=np.float32),
{"coordinates": "TLON TLAT time"}, # stale, dangling
),
# leftover model coords (renamed from TLAT/TLON), no standard_name
"lat": (["j", "i"], np.zeros((ny, nx)), {"units": "degrees_north"}),
"lon": (["j", "i"], np.zeros((ny, nx)), {"units": "degrees_east"}),
},
coords={
"time": ("time", pd.date_range("2000-01-01", periods=nt, freq="ME")),
"i": ("i", np.arange(nx)),
"j": ("j", np.arange(ny)),
},
)
grid_info = {
"i": np.arange(nx),
"j": np.arange(ny),
"vertices": np.arange(4),
"latitude": xr.DataArray(np.ones((ny, nx)), dims=("j", "i")),
"longitude": xr.DataArray(np.ones((ny, nx)), dims=("j", "i")),
"vertices_latitude": xr.DataArray(
np.ones((ny, nx, 4)), dims=("j", "i", "vertices")
),
"vertices_longitude": xr.DataArray(
np.ones((ny, nx, 4)), dims=("j", "i", "vertices")
),
}
with patch("access_moppy.sea_ice.Supergrid"):
cmoriser = SeaIce_CMORiser(
input_paths=["test.nc"],
output_path=str(temp_dir),
compound_name="SImon.siconc",
vocab=vocab,
variable_mapping=mapping,
)
cmoriser.ds = ds
cmoriser.grid_type = "T"
cmoriser.symmetric = None
cmoriser.supergrid = Mock()
cmoriser.supergrid.extract_grid.return_value = grid_info

with patch.object(cmoriser, "_check_calendar"):
cmoriser.update_attributes()

assert "lat" not in cmoriser.ds and "lon" not in cmoriser.ds
assert "latitude" in cmoriser.ds and "longitude" in cmoriser.ds
assert cmoriser.ds["siconc"].attrs["coordinates"] == "latitude longitude"


class TestNormaliseTimeBounds:
"""Unit tests for SeaIce_CMORiser._normalise_time_bounds (CICE time bounds)."""

@staticmethod
def _bare():
return object.__new__(SeaIce_CMORiser)

@pytest.mark.unit
def test_renames_cice_time_bounds_and_dim(self):
"""CICE 'time_bounds' on 'd2' becomes 'time_bnds' on 'bnds'."""
ds = xr.Dataset(
{"time_bounds": (["time", "d2"], np.zeros((3, 2)))},
coords={"time": ("time", np.arange(3, dtype=float))},
)
ds["time"].attrs["bounds"] = "time_bounds"
cm = self._bare()
cm.ds = ds
cm._normalise_time_bounds()

assert "time_bnds" in cm.ds and "time_bounds" not in cm.ds
assert cm.ds["time_bnds"].dims == ("time", "bnds")
assert cm.ds["time"].attrs["bounds"] == "time_bnds"

@pytest.mark.unit
def test_noop_when_already_canonical(self):
"""Already-canonical time_bnds/bnds is left unchanged (no error)."""
ds = xr.Dataset(
{"time_bnds": (["time", "bnds"], np.zeros((2, 2)))},
coords={"time": ("time", np.arange(2, dtype=float))},
)
ds["time"].attrs["bounds"] = "time_bnds"
cm = self._bare()
cm.ds = ds
cm._normalise_time_bounds()

assert cm.ds["time_bnds"].dims == ("time", "bnds")
assert cm.ds["time"].attrs["bounds"] == "time_bnds"

@pytest.mark.unit
def test_noop_when_bounds_attr_absent_or_missing_var(self):
"""No time:bounds attr, or it points at a missing variable -> no-op."""
# No bounds attr at all.
ds = xr.Dataset(coords={"time": ("time", np.arange(2, dtype=float))})
cm = self._bare()
cm.ds = ds
cm._normalise_time_bounds() # must not raise
assert "bounds" not in cm.ds["time"].attrs

# bounds attr points at a variable that is not present.
ds2 = xr.Dataset(coords={"time": ("time", np.arange(2, dtype=float))})
ds2["time"].attrs["bounds"] = "time_bounds"
cm2 = self._bare()
cm2.ds = ds2
cm2._normalise_time_bounds() # must not raise
assert "time_bnds" not in cm2.ds