CMIP-Data-Request · JamesAnstey · Feb 23, 2026 · Apr 15, 2026 · Apr 15, 2026 · Apr 15, 2026
diff --git a/requirements.txt b/requirements.txt
@@ -2,6 +2,7 @@ bs4
 coverage
 openpyxl
 pooch
+pydantic
 pytest
 pyyaml
 requests
diff --git a/scripts/unharmonised/DR_Opportunity_template.yaml b/scripts/unharmonised/DR_Opportunity_template.yaml
@@ -0,0 +1,40 @@
+# Minimal template for a data request Opportunity, for use by community MIPs.
+
+Title: Short descriptive title of the Opportunity
+
+MIP: Name of MIP
+
+Description: Statement of the general purpose of this Opportunity's data request.
+
+Expected Impacts: (Optional) Explanation of why this combination of variables and experiments is important.
+
+Justification of Resources: (Optional) Explanation of how the requested variables map onto the impacts, and estimate of the resources required.
+
+Experiment Groups:
+# An Experiment Group specifies a list of experiments for which requested variables should be produced.
+- example_experiment_group # new Experiment Group, defined below
+- deck # existing Experiment Group
+
+Variable Groups:
+# Each Variable Group defines a set of requested variables and its priority.
+- example_variable_group # new Variable Group, defined below
+- baseline_monthly # existing Variable Group
+
+New Experiment Groups:
+
+  example_experiment_group:
+    Title: Short descriptive title of the Experiment Group
+    Experiments:
+      - amip # existing experiment
+      - example_new_experiment # new experiment, must be registered in CVs
+
+New Variable Groups:
+
+  example_variable_group: 
+    Title: Short descriptive title of the Variable Group
+    Priority Level: High # High, Medium, or Low (not case sensitive)
+    Justification: (Optional) Explanation of why these variables are important.
+    Notes: (Optional) Any additional comments about the variable group.
+    Variables: # list of requested variable names
+      - land.gpp.tavg-u-hxy-lnd.mon.glb
+      - atmos.fco2nat.tavg-u-hxy-u.mon.glb
diff --git a/scripts/unharmonised/README_unharmonised_workflow.md b/scripts/unharmonised/README_unharmonised_workflow.md
@@ -0,0 +1,30 @@
+
+## MIP workflow for Unharmonised Data Request
+
+⚠️ *Everything in this document is a proposal, under development, and likely to change*
+
+### Opportunity template
+
+This allows MIPs to create a `json` file representation of a DR "Opportunity" with minimal effort. 
+
+A DR Opportunity lists variables that are requested from a specified set of experiments.
+It includes a description of the scienfitic purpose of the request.
+This can be very brief, but including detailed information is also possible.
+A template Opportunity is provided in `yaml` format, which a MIP can edit.
+
+First, copy the template:
+```bash
+cp DR_Opportunity_template.yaml new_MIP_data_request.yaml
+```
+Edit the new file, which in this example is named  `new_MIP_data_request.yaml`, to specify the requested variables and experiments from which they're requested.
+Variables are grouped into Variable Groups, which have a priority level (High, Medium, Low) attached.
+Experiments are grouped into Experiment Groups.
+If a MIP simply has one list of variables that are all requested from the same list of experiments, then one Variable Group and one Experiment Group is sufficient.
+
+Then validate the new request against existing DR content:
+```bash
+./ingest.py new_MIP_data_request.yaml new_MIP_data_request.json v1.2.2.3
+```
+This should be run in an env where the DR python API is installed ([see here](https://github.com/CMIP-Data-Request/CMIP7_DReq_Software#installation) for installation guidance).
+This performs some sanity checks, including checking that variable and experiment names are valid (i.e., they are defined in existing DR content and CMIP7 CVs).
+If the checks pass, the output file, which here is `new_MIP_data_request.json`, represents in the new request's information in a format that can be used in the DR python API.
diff --git a/scripts/unharmonised/example_validated_opportunity.json b/scripts/unharmonised/example_validated_opportunity.json
@@ -0,0 +1,42 @@
+{
+    "Header": {
+        "Provenance": "Validated Opportunity from input file DR_Opportunity_template.yaml",
+        "Data Request version used for validation": "v1.2.2.3"
+    },
+    "Opportunity": {
+        "title": "Short descriptive title of the Opportunity",
+        "mip": "Name of MIP",
+        "description": "Statement of the general purpose of this Opportunity's data request.",
+        "expected_impacts": "(Optional) Explanation of why this combination of variables and experiments is important.",
+        "justification_of_resources": "(Optional) Explanation of how the requested variables map onto the impacts, and estimate of the resources required.",
+        "experiment_groups": [
+            "example_experiment_group",
+            "deck"
+        ],
+        "variable_groups": [
+            "example_variable_group",
+            "baseline_monthly"
+        ]
+    },
+    "New Experiment Groups": {
+        "example_experiment_group": {
+            "title": "Short descriptive title of the Experiment Group",
+            "experiments": [
+                "amip",
+                "example_new_experiment"
+            ]
+        }
+    },
+    "New Variable Groups": {
+        "example_variable_group": {
+            "title": "Short descriptive title of the Variable Group",
+            "priority_level": "High",
+            "justification": "(Optional) Explanation of why these variables are important.",
+            "notes": "(Optional) Any additional comments about the variable group.",
+            "variables": [
+                "land.gpp.tavg-u-hxy-lnd.mon.glb",
+                "atmos.fco2nat.tavg-u-hxy-u.mon.glb"
+            ]
+        }
+    }
+}
diff --git a/scripts/unharmonised/ingest.py b/scripts/unharmonised/ingest.py
@@ -0,0 +1,150 @@
+#!/usr/bin/env python
+'''
+Ingest a yaml file that specifies a data request Opportunity
+'''
+
+import argparse
+import json
+import yaml
+
+from collections import OrderedDict
+from pydantic import BaseModel
+
+import data_request_api.content.dreq_content as dc
+import data_request_api.query.dreq_query as dq
+from data_request_api.query.dreq_classes import (
+    PRIORITY_LEVELS, format_attribute_name)
+
+
+class ExperimentGroup(BaseModel):
+    title: str
+    experiments: list[str]
+
+class VariableGroup(BaseModel):
+    title: str
+    priority_level: str
+    justification: str = ''
+    notes: str = ''
+    variables: list[str]
+
+class Opportunity(BaseModel):
+    title: str
+    mip: str
+    description: str
+    expected_impacts: str = ''
+    justification_of_resources: str = ''
+    experiment_groups: list[str]
+    variable_groups: list[str]
+
+
+def parse_args():
+    ''' Parse command line arguments'''
+    parser = argparse.ArgumentParser(description="Validate data request Opportunity specified by input yaml file")
+
+    # Mandatory arguments
+    parser.add_argument('input', 
+                        help="Opportunity specifications (yaml file)")
+    parser.add_argument('output', 
+                        help="Validated Opportunity specifications (json file)")
+    parser.add_argument('dreq_version', choices=dc.get_versions(), 
+                        help="Data Request version used to validate input")
+
+    return parser.parse_args()
+
+
+if __name__ == '__main__':
+
+    args = parse_args()
+    input_file = args.input
+    output_file = args.output
+    dreq_version = args.dreq_version
+
+    # Read setup file for new Opportunity
+    with open(input_file, 'r') as f:
+        opp = yaml.safe_load(f)
+
+    # Retrieve specs for any new variable or experiment groups so that they can be validated
+    # against existing DR content, below.
+    # The ExperimentGroup & VariableGroup pydantic models perform validation of the input.
+    sections = ['New Experiment Groups', 'New Variable Groups']
+    for section in sections:
+        for name,info in opp[section].items():
+            opp[section][name] = {format_attribute_name(k):v for k,v in info.items()}
+        match section:
+            case 'New Experiment Groups':
+                new_expt_groups = {name: ExperimentGroup(**info) for name,info in opp[section].items()}
+            case 'New Variable Groups':
+                new_var_groups = {name: VariableGroup(**info) for name,info in opp[section].items()}
+            case _:
+                raise ValueError('Invalid section: ' + section)
+        opp.pop(section)
+
+    # Check priority levels in new Variable Groups are valid
+    for vg_name, vg in new_var_groups.items():
+        if vg.priority_level.lower() not in PRIORITY_LEVELS:
+            raise ValueError(f'Unknown Priority Level for Variable Group {vg_name}: {vg.priority_level}')
+
+    # Get DR content to use in further validating the input
+    dreq_content = dc.load(dreq_version)
+    base = dq._get_base_dreq_tables(dreq_content, dreq_version, purpose='request')
+    dreq_var_info = dq.get_variables_metadata(base, dreq_version)
+    cmip7_compound_names = set([var_info['cmip7_compound_name'] for var_info in dreq_var_info.values()])
+    cmip6_compound_names = set([var_info['cmip6_compound_name'] for var_info in dreq_var_info.values()])
+    dreq_expt_group_names = set(rec.name for rec in base['Experiment Group'].records.values())
+    dreq_var_group_names = set(rec.name for rec in base['Variable Group'].records.values())
+
+    # Check new Variable Group names don't conflict with any already in the DR
+    for vg_name in new_var_groups:
+        if vg_name in dreq_var_group_names:
+            raise ValueError(f'Variable Group already exists in DR {dreq_version}: {vg_name}')
+
+    # Check that the variable names in new Variable Groups are valid
+    for vg_name, vg in new_var_groups.items():
+        invalid_variables = []
+        for var_name in vg.variables:
+            # TODO: should user be forced to say whether using CMIP6 or CMIP7 variable names?
+            # TODO: if new variables are defined (beyond those in AFT DR) then need to add these here as valid names
+            if not (var_name in cmip7_compound_names or var_name in cmip6_compound_names):
+                invalid_variables.append(var_name)
+        if len(invalid_variables) > 0:
+            msg = f'Found {len(invalid_variables)} invalid variables found in Variable Group {vg_name}:\n' \
+                + '\n'.join(invalid_variables)
+            raise ValueError(msg)
+
+    # Check new Experiment Group names don't conflict with any already in the DR
+    for eg_name in new_expt_groups:
+        if eg_name in dreq_expt_group_names:
+            raise ValueError(f'Experiment Group already exists in DR {dreq_version}: {eg_name}')
+
+    # Validate experiments against CVs
+    # TODO: get valid CMIP7 experiments using esgvoc
+    # (cannot rely on AFT DR list since community MIPs will define new experiments)
+
+    # Use Opportunity pydantic model to validate the input
+    opp = {format_attribute_name(k):v for k,v in opp.items()}
+    opp = Opportunity(**opp)
+
+    # Check full Variable Group and Experiment Group lists are either (1) defined as new,
+    # or (2) exist already in the DR.
+    all_expt_group_names = dreq_expt_group_names.union(new_expt_groups.keys())
+    all_var_group_names = dreq_var_group_names.union(new_var_groups.keys())
+    for eg_name in opp.experiment_groups:
+        if eg_name not in all_expt_group_names:
+            raise ValueError(f'Experiment Group {eg_name} has not been newly defined and does not already exist in DR {dreq_version}')
+    for vg_name in opp.variable_groups:
+        if vg_name not in all_var_group_names:
+            raise ValueError(f'Variable Group {vg_name} has not been newly defined and does not already exist in DR {dreq_version}')
+
+    # Write output file
+    out = OrderedDict({
+        'Header': OrderedDict({
+            'Provenance': f'Validated Opportunity from input file {input_file}',
+            'Data Request version used for validation': dreq_version,
+        }),
+        'Opportunity' : OrderedDict(opp),
+        'New Experiment Groups': OrderedDict({name: OrderedDict(info) for name,info in new_expt_groups.items()}),
+        'New Variable Groups': OrderedDict({name: OrderedDict(info) for name,info in new_var_groups.items()})
+    })
+    with open(output_file, 'w') as f:
+        json.dump(out, f, indent=4)
+        print('Wrote ' + output_file)