ACCESS-NRI · rhaegar325 · May 26, 2026 · May 26, 2026 · May 26, 2026 · May 26, 2026
diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml
@@ -24,9 +24,9 @@ jobs:
       - uses: actions/checkout@v6.0.2
         with:
           submodules: recursive
-      - uses: prefix-dev/setup-pixi@v0.9.5
+      - uses: prefix-dev/setup-pixi@v0.9.6
         with:
-          pixi-version: v0.39.5
+          pixi-version: latest
       - name: Run pre-commit
         run: pixi run -e dev pre-commit run --all-files
 
@@ -41,9 +41,9 @@ jobs:
           submodules: recursive
 
       - name: Setup pixi
-        uses: prefix-dev/setup-pixi@v0.9.5
+        uses: prefix-dev/setup-pixi@v0.9.6
         with:
-          pixi-version: v0.39.5
+          pixi-version: latest
           environments: test
 
       - name: Create default config file

diff --git a/.github/workflows/full-tests.yml b/.github/workflows/full-tests.yml
@@ -27,9 +27,9 @@ jobs:
           submodules: recursive
 
       - name: Setup pixi
-        uses: prefix-dev/setup-pixi@b92f01076d41831104bdb4786cfe1f0a5462a1ce # v0.9.0
+        uses: prefix-dev/setup-pixi@v0.9.6
         with:
-          pixi-version: v0.39.5
+          pixi-version: latest
 
       - name: Create default config file
         run: |

diff --git a/pyproject.toml b/pyproject.toml
@@ -143,6 +143,7 @@ quote-style = "double"  # Use double quotes
 "test_mop.py" = ["I001"]
 
 [tool.pixi.workspace]
+requires-pixi = ">=0.49"
 channels = ["conda-forge", { channel = "accessnri", exclude-newer = "0d" }]
 exclude-newer = "7d"
 platforms = ["linux-64"]

diff --git a/src/access_moppy/base.py b/src/access_moppy/base.py
@@ -16,6 +16,7 @@
     FrequencyMismatchError,
     IncompatibleFrequencyError,
     ResamplingRequiredWarning,
+    normalize_cf_time_units,
     type_mapping,
     validate_and_resample_if_needed,
     validate_cmip6_frequency_compatibility,
@@ -801,6 +802,21 @@ def write(self):
 
         Automatically handles character/string coordinates with proper NetCDF encoding.
         """
+        # ========== Normalize CF Time-Coordinate Units ==========
+        # Source models differ in how they write the reference datetime: the UM
+        # atmosphere and CABLE land models omit the seconds field (e.g.
+        # "days since 0001-01-01 00:00"), which fails the WCRP units pattern
+        # check, whereas the ocean/sea-ice models write the full HH:MM:SS form.
+        # Re-emit any CF time units in canonical form here — the sole write path
+        # for every realm — so all variables are normalized uniformly.  This is
+        # meaning-preserving, so the numeric time values are unaffected.
+        for var in self.ds.variables:
+            units = self.ds[var].attrs.get("units")
+            normalized = normalize_cf_time_units(units)
+            if normalized != units:
+                self.ds[var].attrs["units"] = normalized
+                logger.debug("Normalized '%s' units: %r -> %r", var, units, normalized)
+
         # ========== Prepare String Coordinates ==========
         # Detect and prepare all string/character coordinates before writing
         string_coords_info = self._prepare_string_coordinates()

diff --git a/src/access_moppy/derivations/calc_utils.py b/src/access_moppy/derivations/calc_utils.py
@@ -2,7 +2,9 @@
 
 from importlib.resources import as_file
 
+import cftime
 import numpy as np
+import pandas as pd
 import xarray as xr
 
 from access_moppy.utilities import get_bundled_resource_path
@@ -160,6 +162,36 @@ def rename_coord(var1, var2, ndim, override=False):
     return var2, override
 
 
+def _monthly_midpoint_coord(time_da: xr.DataArray) -> xr.DataArray:
+    """Relabel a resampled monthly time coordinate to each month's midpoint.
+
+    ``resample(..., "ME")`` labels every monthly bin at the month-end
+    timestamp. CF/CMIP6 require the time coordinate of a time-mean quantity
+    to be the midpoint of its cell bounds, which for monthly data is the
+    centre of the ``[month start, next month start)`` interval. This recentres
+    each label so ``time`` matches ``midpoint(time_bnds)``. Handles both cftime
+    and datetime64 coordinates.
+    """
+    values = time_da.values
+    is_cftime = isinstance(values.flat[0], cftime.datetime)
+    midpoints = np.empty(len(values), dtype=object)
+    for i, t in enumerate(values):
+        if is_cftime:
+            start = cftime.datetime(t.year, t.month, 1, calendar=t.calendar)
+            if t.month == 12:
+                nxt = cftime.datetime(t.year + 1, 1, 1, calendar=t.calendar)
+            else:
+                nxt = cftime.datetime(t.year, t.month + 1, 1, calendar=t.calendar)
+        else:
+            ts = pd.Timestamp(t)
+            start = pd.Timestamp(year=ts.year, month=ts.month, day=1)
+            nxt = start + pd.offsets.MonthBegin(1)
+        midpoints[i] = start + (nxt - start) / 2
+    if not is_cftime:
+        midpoints = pd.DatetimeIndex(midpoints).values
+    return time_da.copy(data=midpoints)
+
+
 def calculate_monthly_minimum(
     da: xr.DataArray, time_dim: str = "time", preserve_attrs: bool = True
 ) -> xr.DataArray:
@@ -202,7 +234,7 @@ def calculate_monthly_minimum(
     - Input data should have temporal frequency higher than monthly (daily, 3hr, 6hr, etc.)
     - The function uses xarray's resample method with 'M' frequency (end of month)
     - Cell methods attribute is updated to reflect the temporal aggregation
-    - Time coordinate is preserved with monthly timestamps
+    - Time coordinate is set to each month's midpoint (centre of time_bnds)
     """
     if time_dim not in da.dims:
         raise ValueError(
@@ -225,6 +257,11 @@ def calculate_monthly_minimum(
 
     try:
         monthly_min = da.resample({time_dim: "ME"}).min(keep_attrs=preserve_attrs)
+        # "ME" labels each bin at month-end; recentre to the cell midpoint
+        # so the time coordinate matches midpoint(time_bnds) (CF/CMIP6).
+        monthly_min = monthly_min.assign_coords(
+            {time_dim: _monthly_midpoint_coord(monthly_min[time_dim])}
+        )
 
         if preserve_attrs:
             # Update cell_methods to reflect the temporal aggregation
@@ -284,7 +321,7 @@ def calculate_monthly_maximum(
     - Input data should have temporal frequency higher than monthly (daily, 3hr, 6hr, etc.)
     - The function uses xarray's resample method with 'M' frequency (end of month)
     - Cell methods attribute is updated to reflect the temporal aggregation
-    - Time coordinate is preserved with monthly timestamps
+    - Time coordinate is set to each month's midpoint (centre of time_bnds)
     """
     if time_dim not in da.dims:
         raise ValueError(
@@ -307,6 +344,11 @@ def calculate_monthly_maximum(
 
     try:
         monthly_max = da.resample({time_dim: "ME"}).max(keep_attrs=preserve_attrs)
+        # "ME" labels each bin at month-end; recentre to the cell midpoint
+        # so the time coordinate matches midpoint(time_bnds) (CF/CMIP6).
+        monthly_max = monthly_max.assign_coords(
+            {time_dim: _monthly_midpoint_coord(monthly_max[time_dim])}
+        )
 
         if preserve_attrs:
             # Update cell_methods to reflect the temporal aggregation

diff --git a/src/access_moppy/utilities.py b/src/access_moppy/utilities.py
@@ -1,6 +1,7 @@
 import importlib.metadata as importlib_metadata
 import json
 import logging
+import re
 import warnings
 from datetime import timedelta
 from importlib.resources import files
@@ -2054,6 +2055,47 @@ def validate_and_resample_if_needed(
     return ds_resampled, True
 
 
+# CF time-coordinate units take the form "<interval> since <reference datetime>".
+# The WCRP ATTR004 check requires the reference datetime to be either a bare date
+# or a full "YYYY-MM-DD HH:MM:SS"; partial forms such as
+# "days since 0001-01-01 00:00" (no seconds — written by the UM atmosphere and
+# CABLE land models) fail the check.  This pattern captures the components so the
+# reference instant can be re-emitted in canonical form.
+_CF_TIME_UNITS_RE = re.compile(
+    r"^(?P<interval>\w+)\s+since\s+"
+    r"(?P<year>\d{1,4})-(?P<month>\d{1,2})-(?P<day>\d{1,2})"
+    r"(?:[ T](?P<hour>\d{1,2}):(?P<minute>\d{1,2})(?::(?P<second>\d{1,2}(?:\.\d+)?))?)?"
+    r"\s*$",
+    re.IGNORECASE,
+)
+
+
+def normalize_cf_time_units(units: Optional[str]) -> Optional[str]:
+    """Return *units* with its reference datetime padded to canonical CF form.
+
+    Converts partial or date-only reference datetimes to a full
+    ``"<interval> since YYYY-MM-DD HH:MM:SS"`` string, e.g.
+    ``"days since 0001-01-01 00:00"`` -> ``"days since 0001-01-01 00:00:00"``.
+
+    The reference *instant* is preserved (an absent or partial time-of-day is
+    midnight either way), so any numeric time values measured against these units
+    remain correct.  Strings that are not recognisable CF time units — including
+    those carrying a timezone offset — are returned unchanged.
+    """
+    if not isinstance(units, str):
+        return units
+    match = _CF_TIME_UNITS_RE.match(units.strip())
+    if match is None:
+        return units
+    g = match.groupdict()
+    return (
+        f"{g['interval']} since "
+        f"{int(g['year']):04d}-{int(g['month']):02d}-{int(g['day']):02d} "
+        f"{int(g['hour'] or 0):02d}:{int(g['minute'] or 0):02d}:"
+        f"{int(float(g['second'] or 0)):02d}"
+    )
+
+
 def calculate_time_bounds(
     ds: xr.Dataset, time_coord: str = "time", bnds_name: str = "nv"
 ) -> xr.DataArray:

diff --git a/tests/unit/test_base.py b/tests/unit/test_base.py
@@ -759,6 +759,72 @@ def test_write_preserves_data_values(self, cmoriser_with_dataset, temp_dir):
             finally:
                 ds_out.close()
 
+    @pytest.mark.unit
+    def test_write_normalizes_partial_time_units(
+        self, mock_vocab, mock_mapping, temp_dir
+    ):
+        """write() pads a partial CF time-units string to the full canonical form.
+
+        Regression test for the WCRP ATTR004 failure: UM atmosphere/land source
+        files write "days since YYYY-MM-DD 00:00" (no seconds), which must be
+        re-emitted as "...00:00:00".  Non-time units must be left untouched, and
+        the numeric time values must not change (meaning-preserving).
+        """
+        original_time = np.arange(3).astype(float)
+        ds = xr.Dataset(
+            {
+                "tas": (
+                    ["time", "lat", "lon"],
+                    np.random.rand(3, 4, 5).astype(np.float32),
+                    {"_FillValue": 1e20, "units": "K"},
+                ),
+            },
+            coords={
+                "time": (
+                    "time",
+                    original_time,
+                    {"units": "days since 2000-01-01 00:00", "calendar": "standard"},
+                ),
+                "lat": ("lat", np.arange(4)),
+                "lon": ("lon", np.arange(5)),
+            },
+            attrs={
+                "variable_id": "tas",
+                "table_id": "Amon",
+                "source_id": "ACCESS-ESM1-5",
+                "experiment_id": "historical",
+                "variant_label": "r1i1p1f1",
+                "grid_label": "gn",
+            },
+        )
+        cmoriser = CMORiser(
+            input_paths=["test.nc"],
+            output_path=str(temp_dir),
+            vocab=mock_vocab,
+            variable_mapping=mock_mapping,
+            compound_name="Amon.tas",
+        )
+        cmoriser.ds = ds
+        cmoriser.cmor_name = "tas"
+
+        with patch("access_moppy.base.psutil.virtual_memory") as mock_mem:
+            mock_mem.return_value = MagicMock(
+                total=32 * 1024**3, available=16 * 1024**3
+            )
+            cmoriser.write()
+
+        output_files = list(Path(temp_dir).glob("*.nc"))
+        ds_out = xr.open_dataset(output_files[0], decode_cf=False)
+        try:
+            # time units padded to full HH:MM:SS (the normalized branch)
+            assert ds_out["time"].attrs["units"] == "days since 2000-01-01 00:00:00"
+            # non-time units left untouched (the unchanged branch)
+            assert ds_out["tas"].attrs["units"] == "K"
+            # numeric time values preserved
+            np.testing.assert_array_equal(ds_out["time"].values, original_time)
+        finally:
+            ds_out.close()
+
     @pytest.mark.unit
     def test_write_bounds_attribute_handling(self, cmoriser_with_dataset, temp_dir):
         """

diff --git a/tests/unit/test_derivations_calc_utils.py b/tests/unit/test_derivations_calc_utils.py
@@ -420,6 +420,89 @@ def test_resample_failure_raises_runtime_error(self):
                 calculate_monthly_maximum(da)
 
 
+# ---------------------------------------------------------------------------
+# Monthly time-coordinate midpoint (CF/CMIP6 "time squareness", TIME001)
+# ---------------------------------------------------------------------------
+
+
+class TestMonthlyTimeCoordinateMidpoint:
+    """The monthly time coordinate must sit at the midpoint of its cell bounds.
+
+    Regression for tasmax/tasmin being stamped at month-end instead of the
+    midpoint of [month start, next month start) after resampling daily data.
+    """
+
+    @pytest.mark.unit
+    @pytest.mark.parametrize(
+        "func", [calculate_monthly_minimum, calculate_monthly_maximum]
+    )
+    def test_time_coord_is_month_midpoint(self, func):
+        # Daily data for Jan-Mar 1850, matching the failing ACCESS-ESM files.
+        times = xr.date_range(
+            "1850-01-01", "1850-03-31", freq="D", calendar="proleptic_gregorian"
+        )
+        da = xr.DataArray(
+            np.random.default_rng(0).random(len(times)),
+            dims=["time"],
+            coords={"time": times},
+            name="tas",
+        )
+        result = func(da)
+        got = result["time"].values.astype("datetime64[s]")
+        expected = np.array(
+            ["1850-01-16T12:00:00", "1850-02-15T00:00:00", "1850-03-16T12:00:00"],
+            dtype="datetime64[s]",
+        )
+        assert (got == expected).all()
+
+    @pytest.mark.unit
+    def test_time_equals_midpoint_of_time_bnds(self):
+        from access_moppy.utilities import calculate_time_bounds
+
+        times = xr.date_range(
+            "1850-01-01", "1850-03-31", freq="D", calendar="proleptic_gregorian"
+        )
+        da = xr.DataArray(
+            np.random.default_rng(0).random(len(times)),
+            dims=["time"],
+            coords={"time": times},
+            name="tasmax",
+        )
+        result = calculate_monthly_maximum(da)
+        ds = result.to_dataset(name="tasmax")
+        ds["time"].attrs.update(
+            {"units": "days since 1970-01-01 00:00", "calendar": "proleptic_gregorian"}
+        )
+        tb = calculate_time_bounds(ds, time_coord="time", bnds_name="bnds")
+
+        epoch = np.datetime64("1970-01-01T00:00:00")
+        day = np.timedelta64(1, "D")
+        tnum = (result["time"].values - epoch) / day
+        midpoint = ((tb.values - epoch) / day).mean(axis=1)
+        assert np.allclose(tnum, midpoint)
+        # First stored value is the one flagged by the compliance checker.
+        assert tnum[0] == pytest.approx(-43813.5)
+
+    @pytest.mark.unit
+    def test_cftime_calendar_midpoint(self):
+        import cftime
+
+        times = xr.date_range(
+            "1850-01-01", periods=90, freq="D", calendar="360_day", use_cftime=True
+        )
+        da = xr.DataArray(
+            np.random.default_rng(1).random(len(times)),
+            dims=["time"],
+            coords={"time": times},
+            name="tasmin",
+        )
+        result = calculate_monthly_minimum(da)
+        first = result["time"].values[0]
+        assert isinstance(first, cftime.datetime)
+        # 360_day month is 30 days -> midpoint is day 16 at 00:00.
+        assert (first.month, first.day, first.hour) == (1, 16, 0)
+
+
 # ---------------------------------------------------------------------------
 # Helpers for load_ressource_data
 # ---------------------------------------------------------------------------