Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 42 additions & 1 deletion src/access_moppy/vocabulary_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -883,6 +883,47 @@ def generate_filename(

return rendered_filename

# Canonical CMIP6-CV ``experiment`` labels resolved via esgvoc, keyed by
# experiment_id. esgvoc lookups touch a database, so resolve each
# experiment at most once per process.
_EXPERIMENT_LABEL_CACHE: Dict[str, Optional[str]] = {}

def _resolve_experiment_label(self) -> str:
"""Return the canonical ``experiment`` global-attribute value.

The WCRP compliance checker (cc-plugin-wcrp + esgvoc) compares the
global ``experiment`` attribute against esgvoc's CMIP6 controlled
vocabulary, whose label (e.g. ``"Historical simulation"``) differs from
the descriptive phrase carried in the legacy CMIP6_CVs JSON bundled with
this package (e.g. ``"all-forcing simulation of the recent past"``).

Resolve the label from esgvoc so the written attribute matches what the
checker validates. Fall back to the bundled CV value when esgvoc is
unavailable or carries no label for this experiment -- in the latter
case the checker skips the comparison, so the legacy value is accepted.
"""
legacy_label = self.experiment.get("experiment", "")

eid = self.experiment_id
if eid not in CMIP6Vocabulary._EXPERIMENT_LABEL_CACHE:
label: Optional[str] = None
try:
import esgvoc.api as voc

term = voc.get_term_in_collection(
project_id="cmip6",
collection_id="experiment_id",
term_id=eid,
)
if term is not None:
label = getattr(term, "experiment", None)
except Exception:
label = None
CMIP6Vocabulary._EXPERIMENT_LABEL_CACHE[eid] = label

resolved = CMIP6Vocabulary._EXPERIMENT_LABEL_CACHE[eid]
return resolved if resolved else legacy_label

def get_required_global_attributes(self) -> Dict[str, Any]:
now = datetime.now(tz=timezone.utc).strftime("%Y-%m-%dT%H:%M:%SZ")
variant = self.get_variant_components()
Expand All @@ -892,7 +933,7 @@ def get_required_global_attributes(self) -> Dict[str, Any]:
"activity_id": self._resolve_activity_id(),
"creation_date": now,
"data_specs_version": self.cmip_table["Header"].get("data_specs_version"),
"experiment": self.experiment["experiment"],
"experiment": self._resolve_experiment_label(),
"experiment_id": self.experiment_id,
"forcing_index": variant["forcing_index"],
"frequency": self.variable["frequency"],
Expand Down
82 changes: 82 additions & 0 deletions tests/unit/test_vocabulary_processors.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
"""Unit tests for vocabulary processor helper methods."""

import sys
from types import SimpleNamespace
from unittest.mock import mock_open, patch

import numpy as np
Expand Down Expand Up @@ -890,3 +892,83 @@ def test_load_table_error_includes_filename_and_directory(
msg = str(exc_info.value)
assert "looked for" in msg
assert str(vocab.table_dir) in msg


# ---------------------------------------------------------------------------
# _resolve_experiment_label: canonical CMIP6 `experiment` global attribute
# (see issue-experiment-attribute-cv-mismatch). The WCRP checker compares the
# file's `experiment` against esgvoc's label, which differs from the legacy
# CMIP6_CVs phrase bundled with this package.
# ---------------------------------------------------------------------------
def _fake_esgvoc_module(term):
"""Build a stand-in ``esgvoc.api`` module returning ``term`` from any lookup."""
api = SimpleNamespace(get_term_in_collection=lambda **kwargs: term)
return {"esgvoc": SimpleNamespace(api=api), "esgvoc.api": api}


@pytest.mark.unit
def test_resolve_experiment_label_uses_esgvoc(vocabulary_instance):
"""esgvoc's canonical label overrides the legacy CV description."""
CMIP6Vocabulary._EXPERIMENT_LABEL_CACHE.clear()
vocabulary_instance.experiment_id = "historical"
vocabulary_instance.experiment = {
"experiment": "all-forcing simulation of the recent past"
}
term = SimpleNamespace(experiment="Historical simulation")
with patch.dict(sys.modules, _fake_esgvoc_module(term)):
assert (
vocabulary_instance._resolve_experiment_label() == "Historical simulation"
)


@pytest.mark.unit
def test_resolve_experiment_label_falls_back_when_esgvoc_label_empty(
vocabulary_instance,
):
"""When esgvoc has no label for the experiment, keep the legacy CV value."""
CMIP6Vocabulary._EXPERIMENT_LABEL_CACHE.clear()
vocabulary_instance.experiment_id = "piControl"
vocabulary_instance.experiment = {"experiment": "pre-industrial control"}
term = SimpleNamespace(experiment=None)
with patch.dict(sys.modules, _fake_esgvoc_module(term)):
assert (
vocabulary_instance._resolve_experiment_label() == "pre-industrial control"
)


@pytest.mark.unit
def test_resolve_experiment_label_falls_back_when_esgvoc_missing(vocabulary_instance):
"""Without esgvoc installed, fall back to the legacy CV value."""
CMIP6Vocabulary._EXPERIMENT_LABEL_CACHE.clear()
vocabulary_instance.experiment_id = "historical"
vocabulary_instance.experiment = {
"experiment": "all-forcing simulation of the recent past"
}
# Make `import esgvoc.api` raise ImportError.
with patch.dict(sys.modules, {"esgvoc": None, "esgvoc.api": None}):
assert (
vocabulary_instance._resolve_experiment_label()
== "all-forcing simulation of the recent past"
)


@pytest.mark.unit
def test_resolve_experiment_label_is_cached(vocabulary_instance):
"""The esgvoc lookup is performed at most once per experiment_id."""
CMIP6Vocabulary._EXPERIMENT_LABEL_CACHE.clear()
vocabulary_instance.experiment_id = "historical"
vocabulary_instance.experiment = {"experiment": "legacy"}
calls = {"n": 0}

def _counting_lookup(**kwargs):
calls["n"] += 1
return SimpleNamespace(experiment="Historical simulation")

api = SimpleNamespace(get_term_in_collection=_counting_lookup)
fake = {"esgvoc": SimpleNamespace(api=api), "esgvoc.api": api}
with patch.dict(sys.modules, fake):
first = vocabulary_instance._resolve_experiment_label()
second = vocabulary_instance._resolve_experiment_label()

assert first == second == "Historical simulation"
assert calls["n"] == 1