Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 40 additions & 30 deletions src/access_moppy/vocabulary_processors.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from pathlib import Path
from typing import Any, Dict, List, Optional

import esgvoc.api as ev
import numpy as np
import xarray as xr

Expand Down Expand Up @@ -81,28 +82,34 @@ def _load_controlled_vocab(self) -> Dict[str, Any]:
return vocab

def _get_experiment(self) -> Dict[str, Any]:
try:
return self.vocab["experiment_id"][self.experiment_id]
except KeyError:
raise ValueError(
f"Experiment '{self.experiment_id}' not found in controlled vocabularies."
if ev.valid_term_in_collection(
project_id="cmip6", collection_id="experiment_id", value=self.experiment_id
):
return dict(
ev.get_term_in_collection(
project_id="cmip6",
collection_id="experiment_id",
term_id=self.experiment_id.lower(),
)
)

def _get_parent_metadata(self) -> Dict[str, Any]:
if not self.parent_experiment_id:
return {}

parent_cv = self.vocab.get("experiment_id", {})
if self.parent_experiment_id not in parent_cv:
else:
raise ValueError(
f"Parent experiment '{self.parent_experiment_id}' not found in controlled vocabularies."
f"Experiment '{self.experiment_id}' not found in controlled vocabularies."
)
return parent_cv[self.parent_experiment_id]

def _get_source(self) -> Dict[str, Any]:
try:
return self.vocab["source_id"][self.source_id]
except KeyError:
if ev.valid_term_in_collection(
project_id="cmip6", collection_id="source_id", value=self.source_id
):
return dict(
ev.get_terms_in_collection_by_key_value(
project_id="cmip6",
collection_id="source_id",
key="id",
value=self.source_id.lower(),
)[0]
)
else:
raise ValueError(
f"Source '{self.source_id}' not found in controlled vocabularies."
)
Expand Down Expand Up @@ -844,8 +851,8 @@ def get_required_global_attributes(self) -> Dict[str, Any]:
"grid": "native atmosphere N96 grid (145x192 latxlon)",
"grid_label": self.grid_label,
"initialization_index": variant["initialization_index"],
"institution": self._get_institution(),
"institution_id": ",".join(self.source["institution_id"]),
"institution": self._get_institution()["description"],
"institution_id": self._get_institution()["drs_name"],
"license": self._get_license(),
"mip_era": self.mip_era,
"nominal_resolution": self._get_nominal_resolution(),
Expand Down Expand Up @@ -882,16 +889,19 @@ def get_required_global_attributes(self) -> Dict[str, Any]:
return attrs

def _get_institution(self) -> str:
institution_ids = self.source.get("institution_id", [])
if not institution_ids:
return ""

institution_map = self.vocab.get("institution_id")
if isinstance(institution_map, dict):
first_id = institution_ids[0]
return institution_map.get(first_id, first_id)

return ",".join(institution_ids)
organisation_id = self.source.get("organisation_id", [])[0]
if organisation_id:
return dict(
ev.get_term_in_collection(
project_id="cmip6",
collection_id="institution_id",
term_id=organisation_id,
)
)
else:
raise ValueError(
f"Institution '{organisation_id}' not found in controlled vocabularies."
)

def _get_nominal_resolution(self) -> Optional[str]:
realm = self.variable.get("modeling_realm")
Expand Down Expand Up @@ -935,7 +945,7 @@ def _format_source_string(self) -> str:

def _get_further_info_url(self) -> str:
mip_era = self.mip_era
institution_id = self.source["institution_id"][0]
institution_id = self.source["organisation_id"][0]
source_id = self.source_id
experiment_id = self.experiment_id
sub_experiment_id = self._get_sub_experiment_id()[0]
Expand Down
Loading