Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,9 @@

### Adjustments and Enhancements

- Added a new core rule `var-missing-data` that checks for the recommended
use of a variable's missing data.

- Added a new core rule `access-latency` that can be used to check the
time it takes to open a dataset.

Expand Down
1 change: 0 additions & 1 deletion docs/todo.md
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
## Desired

- project logo
- add `core` rule checks recommended use of fill value
- add `xcube` rule that helps to identify chunking issues
- apply rule op args/kwargs validation schema
- allow outputting suggestions, if any, that are emitted by some rules
Expand Down
65 changes: 65 additions & 0 deletions tests/plugins/core/rules/test_var_missing_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,65 @@
# Copyright © 2025 Brockmann Consult GmbH.
# This software is distributed under the terms and conditions of the
# MIT license (https://mit-license.org/).
import numpy as np
import xarray as xr

from xrlint.plugins.core.rules.var_missing_data import VarMissingData
from xrlint.testing import RuleTest, RuleTester

valid_dataset_0 = xr.Dataset()
valid_dataset_1 = xr.Dataset(
attrs=dict(title="v-data"),
coords={"t": xr.DataArray([0, 1, 2], dims="t", attrs={"units": "seconds"})},
data_vars={"v": xr.DataArray([10, 20, 30], dims="t", attrs={"units": "m/s"})},
)

invalid_dataset_0 = valid_dataset_1.copy(deep=True)
invalid_dataset_0.t.attrs["_FillValue"] = -999

invalid_dataset_1 = valid_dataset_1.copy(deep=True)
invalid_dataset_1.t.encoding["_FillValue"] = -999

invalid_dataset_2 = valid_dataset_1.copy(deep=True)
invalid_dataset_2.v.attrs["scaling_factor"] = 0.01

invalid_dataset_3 = valid_dataset_1.copy(deep=True)
invalid_dataset_3.v.encoding["dtype"] = np.dtype(np.float64)

invalid_dataset_4 = valid_dataset_1.copy(deep=True)
invalid_dataset_4.v.attrs["valid_range"] = [0, 1]

VarMissingDataTest = RuleTester.define_test(
"var-missing-data",
VarMissingData,
valid=[
RuleTest(dataset=valid_dataset_0),
RuleTest(dataset=valid_dataset_1),
],
invalid=[
RuleTest(
dataset=invalid_dataset_0,
expected=[
"Unexpected attribute '_FillValue', coordinates must not have missing data."
],
),
RuleTest(
dataset=invalid_dataset_1,
expected=[
"Unexpected encoding '_FillValue', coordinates must not have missing data."
],
),
RuleTest(
dataset=invalid_dataset_2,
expected=["Missing attribute '_FillValue' since data packing is used."],
),
RuleTest(
dataset=invalid_dataset_3,
expected=["Missing attribute '_FillValue', which should be NaN."],
),
RuleTest(
dataset=invalid_dataset_4,
expected=["Valid ranges are not recognized by xarray (as of Feb 2025)."],
),
],
)
1 change: 1 addition & 0 deletions tests/plugins/core/test_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ def test_rules_complete(self):
"time-coordinate",
"var-desc",
"var-flags",
"var-missing-data",
"var-units",
},
set(plugin.rules.keys()),
Expand Down
1 change: 1 addition & 0 deletions xrlint/plugins/core/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ def export_plugin() -> Plugin:
"time-coordinate": "error",
"var-desc": "warn",
"var-flags": "error",
"var-missing-data": "warn",
"var-units": "warn",
},
},
Expand Down
53 changes: 53 additions & 0 deletions xrlint/plugins/core/rules/var_missing_data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright © 2025 Brockmann Consult GmbH.
# This software is distributed under the terms and conditions of the
# MIT license (https://mit-license.org/).

import numpy as np

from xrlint.node import VariableNode
from xrlint.plugins.core.plugin import plugin
from xrlint.rule import RuleContext, RuleOp


@plugin.define_rule(
"var-missing-data",
version="1.0.0",
type="suggestion",
description=(
"Checks the recommended use of missing data, i.e., coordinate variables"
" should not define missing data, but packed data should."
" Notifies about the use of valid ranges to indicate missing data, which"
" is currently not supported by xarray."
),
docs_url="https://cfconventions.org/cf-conventions/cf-conventions.html#units",
)
class VarMissingData(RuleOp):
def validate_variable(self, ctx: RuleContext, node: VariableNode):
array = node.array
encoding = array.encoding
attrs = array.attrs

fill_value_source = None
if "_FillValue" in encoding:
fill_value_source = "encoding"
elif "_FillValue" in attrs:
fill_value_source = "attribute"

if fill_value_source is not None and node.name in ctx.dataset.coords:
ctx.report(
f"Unexpected {fill_value_source} '_FillValue',"
f" coordinates must not have missing data."
)
elif fill_value_source is None and node.name in ctx.dataset.data_vars:
scaling_factor = encoding.get("scaling_factor", attrs.get("scaling_factor"))
add_offset = encoding.get("add_offset", attrs.get("add_offset"))
raw_dtype = encoding.get("dtype")
if add_offset is not None or scaling_factor is not None:
ctx.report("Missing attribute '_FillValue' since data packing is used.")
elif isinstance(raw_dtype, np.dtype) and np.issubdtype(
raw_dtype, np.floating
):
ctx.report("Missing attribute '_FillValue', which should be NaN.")

if any((name in attrs) for name in ("valid_min", "valid_max", "valid_range")):
ctx.report("Valid ranges are not recognized by xarray (as of Feb 2025).")