Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -17,14 +17,15 @@ def parse_args():
'''

parser = argparse.ArgumentParser(
description='Get metadata of CMOR variables (e.g., cell_methods, dimensions, ...) and write it to a json file.'
formatter_class=argparse.RawTextHelpFormatter,
description='Write data request variables metadata (cell_methods, dimensions, ...) to json or csv file.'
)

# Positional (mandatory) input arguments
parser.add_argument('dreq_version', choices=dc.get_versions(),
help='data request version')
parser.add_argument('outfile', type=str,
help='output file containing metadata of requested variables, can be ".json" or ".csv" file')
help='output file (specify ".json" or ".csv" extension)')

sep = ','

Expand All @@ -36,12 +37,26 @@ def parse_input_list(input_str: str, sep=sep) -> list:
return input_args

# Optional input arguments
parser.add_argument('-a', '--attributes', type=parse_input_list,
help=f'include only the specified variable metadtaa attributes, examples: \
\n -a frequency{sep}modeling_realm{sep}region \
\n -a branded_variable_name{sep}long_name{sep}standard_name \
\n -a dimensions{sep}cell_methods')
parser.add_argument('-cn', '--compound_names', type=parse_input_list,
help=f'include only variables with the specified compound names, example: -cn Amon.tas{sep}Omon.sos')
parser.add_argument('-t', '--cmor_tables', type=parse_input_list,
help=f'include only the specified CMOR tables, example: -t Amon{sep}Omon')
help=f'include only variables with the specified compound names, examples: \
\n -cn Amon.tas{sep}Omon.sos \
\n -cn atmos.tas.tavg-h2m-hxy-u.mon.glb{sep}ocean.sos.tavg-u-hxy-sea.mon.glb \
\nuses CMIP7 or CMIP6 compound name, depending on variable_name config parameter (use CMIP7_data_request_api_config to set)')
parser.add_argument('-v', '--cmor_variables', type=parse_input_list,
help=f'include only the specified CMOR variable short names, example: -v tas{sep}siconc')
help=f'include only the specified CMOR variable out_name, example: \
\n -v tas{sep}siconc')
parser.add_argument('-r', '--realms', type=parse_input_list,
help=f'include only the specified realms, examples: \
\n -r atmos \
\n -r ocean{sep}ocnBgchem{sep}seaIce')
parser.add_argument('-t', '--cmip6_cmor_tables', type=parse_input_list,
help=f'include only the specified CMIP6 CMOR tables, example: \
\n -t Amon{sep}Omon')

return parser.parse_args()

Expand Down Expand Up @@ -69,8 +84,10 @@ def main():
content,
use_dreq_version,
compound_names=args.compound_names,
cmor_tables=args.cmor_tables,
cmor_tables=args.cmip6_cmor_tables,
cmor_variables=args.cmor_variables,
realms=args.realms,
attributes=args.attributes,
)

# Write output file
Expand All @@ -82,6 +99,5 @@ def main():
content_path=dc._dreq_content_loaded['json_path']
)


if __name__ == '__main__':
main()
28 changes: 25 additions & 3 deletions data_request_api/data_request_api/query/dreq_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -709,6 +709,7 @@ def get_requested_variables(content, dreq_version,

def get_variables_metadata(content, dreq_version,
compound_names=None, cmor_tables=None, cmor_variables=None,
realms=None, attributes=None,
verbose=True):
'''
Get metadata for CMOR variables (dimensions, cell_methods, out_name, ...).
Expand All @@ -727,11 +728,20 @@ def get_variables_metadata(content, dreq_version,
Example: ['Amon.tas', 'Omon.sos']
cmor_tables : list[str]
Names of CMOR tables to include. If not given, all are included.
At present this refers to CMIP6 CMOR tables.
Example: ['Amon', 'Omon']
cmor_variables : list[str]
Names of CMOR variables to include. If not given, all are included.
Here the out_name is used as the CMOR variable name.
Example: ['tas', 'siconc']
realms: list[str]
Include only variables that include one of the specified realms in their
list of realms (variables can have more than one realm).
Example: ['atmos', 'aerosol']
attributes: list[str]
Include only the specified metadata attributes.
The attributes are presented in the order specified by the user.
Example: ['frequency', 'region', 'modeling_realm']

Returns:
--------
Expand Down Expand Up @@ -817,6 +827,10 @@ def get_variables_metadata(content, dreq_version,
print('Retaining only these CMOR variables: ' + ', '.join(sorted(cmor_variables, key=str.lower)))
if compound_names:
print('Retaining only these compound names: ' + ', '.join(sorted(compound_names, key=str.lower)))
if realms:
print('Retaining only these realms: ' + ', '.join(sorted(realms, key=str.lower)))
if attributes:
print('Retaining only these attributes: ' + ', '.join(sorted(attributes, key=str.lower)))

substitute = {
# replacement character(s) : [characters to replace with the replacement character]
Expand Down Expand Up @@ -969,13 +983,20 @@ def get_variables_metadata(content, dreq_version,
# Get realm(s)
link_realm = getattr(var, attr_realm)
modeling_realm = [dreq_tables['realm'].get_record(link).id for link in link_realm]
# modeling_realm should now be the primary realm, so check the returned list contains only one realm
if not len(modeling_realm) == 1:
raise ValueError(f'There should be one primary realm for {var_name}, found: ' + ', '.join(modeling_realm))
if hasattr(var, attr_realm_additional):
# Add secondary realm(s), if any, to the list
link_realm_additional = getattr(var, attr_realm_additional)
modeling_realm += [dreq_tables['realm'].get_record(link).id for link in link_realm_additional]
# Raise error if any realm is duplicated in the list
if len(modeling_realm) != len(set(modeling_realm)):
raise ValueError(f'Redundant realm(s) found for DR variable {var_name}: {modeling_realm}')
if realms:
# Exclude variables that don't have any of their realms in list
if len(set(modeling_realm).intersection(set(realms))) == 0:
continue

cell_measures = ''
if hasattr(var, 'cell_measures'):
Expand Down Expand Up @@ -1122,6 +1143,9 @@ def get_variables_metadata(content, dreq_version,
v = v.replace(s, replacement)
var_info[k] = v

if attributes:
var_info = OrderedDict({attr:var_info[attr] for attr in attributes})

assert var_name not in all_var_info, 'non-unique variable name: ' + var_name
all_var_info[var_name] = var_info

Expand Down Expand Up @@ -1353,9 +1377,7 @@ def write_variables_metadata(all_var_info, dreq_version, filepath,
# Write variables metadata to csv
var_info = next(iter(all_var_info.values()))
attrs = list(var_info.keys())
columns = ['Compound Name']
columns.append('standard_name')
columns.append('standard_name_proposed')
columns = ['Compound Name'] # compound name is always the first column
columns += [s for s in attrs if s not in columns]
rows = [columns] # column header line
# Add each variable as a row
Expand Down