ACCESS-Community-Hub · tennlee · Dec 17, 2025 · Dec 3, 2025 · Dec 4, 2025 · Dec 4, 2025
diff --git a/.github/workflows/python-app.yml b/.github/workflows/python-app.yml
@@ -27,19 +27,18 @@ jobs:
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
-        pip install -r requirements.txt
+        pip install -r requirements_cicd.txt
     - name: Test with pytest
       run: |
         # editable is necessary as pytest will run against the installed
         # package rather than the local files creating a coverage report of 0%
         pip install -e packages/utils
-        pip install -e packages/data[all]
-        pip install -e packages/training[all]
-        pip install -e packages/pipeline[all]
+        pip install -e packages/data
+        pip install -e packages/training
+        pip install -e packages/pipeline
         pip install -e packages/zoo
         pip install -e packages/bundled_models/fourcastnext
-        pip install -e packages/tutorial
-        pip install -e .[test,docs]
+        pip install -e .[test]
 
         pytest -m="not noci" --cov=packages/data --cov=packages/utils --cov=packages/pipeline --cov=packages/training --cov=packages/zoo --cov=packages/bundled_models/fourcastnext --ignore=packages/nci_site_archive
     - name: Coveralls GitHub Action

diff --git a/packages/data/tests/transform/test_derive.py b/packages/data/tests/transform/test_derive.py
@@ -16,7 +16,7 @@
 import pytest
 import math
 
-from numpy import nan, isnan
+from numpy import isnan
 from pyearthtools.data.transforms.derive import evaluate, EquationException
 
 

diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/dask/filters.py b/packages/pipeline/src/pyearthtools/pipeline/operations/dask/filters.py
@@ -59,7 +59,7 @@ def filter(self, sample: da.Array):
             (bool):
                 If sample contains nan's
         """
-        if not bool(da.array(list(da.isnan(sample))).any()):
+        if da.array(list(da.isnan(sample))).any():
             raise PipelineFilterException(sample, "Data contained nan's.")
 
 
@@ -85,7 +85,7 @@ def filter(self, sample: da.Array):
             (bool):
                 If sample contains nan's
         """
-        if not bool(da.array(list(da.isnan(sample))).all()):
+        if da.array(list(da.isnan(sample))).all():
             raise PipelineFilterException(sample, "Data contained all nan's.")
 
 
@@ -164,9 +164,9 @@ def _find_shape(self, data: Union[tuple[da.Array, ...], da.Array]) -> tuple[Unio
             return tuple(map(self._find_shape, data))
         return data.shape
 
-    def check_shape(self, sample: Union[tuple[da.Array, ...], da.Array]):
+    def filter(self, sample: Union[tuple[da.Array, ...], da.Array]):
         if isinstance(sample, (list, tuple)):
-            if not isinstance(self._shape, (list, tuple)) and len(self._shape) == len(sample):
+            if not (isinstance(self._shape, (list, tuple)) and len(self._shape) == len(sample)):
                 raise RuntimeError(
                     f"If sample is tuple, shape must also be, and of the same length. {self._shape} != {tuple(self._find_shape(i) for i in sample)}"
                 )

diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/filters.py b/packages/pipeline/src/pyearthtools/pipeline/operations/numpy/filters.py
@@ -168,9 +168,9 @@ def _find_shape(self, data: Union[tuple[np.ndarray, ...], np.ndarray]) -> tuple[
             return tuple(map(self._find_shape, data))
         return data.shape
 
-    def check_shape(self, sample: Union[tuple[np.ndarray, ...], np.ndarray]):
+    def filter(self, sample: Union[tuple[np.ndarray, ...], np.ndarray]):
         if isinstance(sample, (list, tuple)):
-            if not isinstance(self._shape, (list, tuple)) and len(self._shape) == len(sample):
+            if not (isinstance(self._shape, (list, tuple)) and len(self._shape) == len(sample)):
                 raise RuntimeError(
                     f"If sample is tuple, shape must also be, and of the same length. {self._shape} != {tuple(self._find_shape(i) for i in sample)}"
                 )

diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/filters.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/filters.py
@@ -17,7 +17,7 @@
 
 import numpy as np
 import xarray as xr
-
+import warnings
 import math
 
 from pyearthtools.pipeline.filters import Filter, PipelineFilterException
@@ -58,7 +58,7 @@ def __init__(self, variables: Optional[list] = None) -> None:
 
         self.variables = variables
 
-    def _check(self, sample: xr.Dataset):
+    def filter(self, sample: xr.Dataset):
         """Check if any of the sample is nan
 
         Args:
@@ -68,10 +68,21 @@ def _check(self, sample: xr.Dataset):
             (bool):
                 If sample contains nan's
         """
+
         if self.variables:
-            sample = sample[self.variables]
+            if isinstance(sample, xr.DataArray):
+                warnings.warn("input sample is xr.DataArray - ignoring filter variables.")
+            else:
+                sample = sample[self.variables]
+
+        if isinstance(sample, xr.DataArray):
+            has_nan = np.isnan(sample).any()
+        elif isinstance(sample, xr.Dataset):
+            has_nan = np.array(list(np.isnan(sample).values())).any()
+        else:
+            raise TypeError("This filter only accepts xr.DataArray or xr.Dataset")
 
-        if not bool(np.array(list(np.isnan(sample).values())).any()):
+        if has_nan:
             raise PipelineFilterException(sample, "Data contained nan's.")
 
 
@@ -95,7 +106,7 @@ def __init__(self, variables: Optional[list] = None) -> None:
 
         self.variables = variables
 
-    def _check(self, sample: xr.Dataset):
+    def filter(self, sample: xr.Dataset):
         """Check if all of the sample is nan
 
         Args:
@@ -106,9 +117,19 @@ def _check(self, sample: xr.Dataset):
                 If sample contains nan's
         """
         if self.variables:
-            sample = sample[self.variables]
+            if isinstance(sample, xr.DataArray):
+                warnings.warn("input sample is xr.DataArray - ignoring filter variables.")
+            else:
+                sample = sample[self.variables]
+
+        if isinstance(sample, xr.DataArray):
+            all_nan = np.isnan(sample).all()
+        elif isinstance(sample, xr.Dataset):
+            all_nan = np.array(list(np.isnan(sample).values())).all()
+        else:
+            raise TypeError("This filter only accepts xr.DataArray or xr.Dataset")
 
-        if not bool(np.array(list(np.isnan(sample).values())).all()):
+        if all_nan:
             raise PipelineFilterException(sample, "Data contained all nan's.")
 
 
@@ -147,16 +168,24 @@ def filter(self, sample: T):
             (bool):
                 If sample contains nan's
         """
-        if np.isnan(self._value):
-            function = (  # noqa
-                lambda x: ((np.count_nonzero(np.isnan(x)) / math.prod(x.shape)) * 100) >= self._percentage
-            )  # noqa
+        if isinstance(sample, xr.DataArray):
+            if np.isnan(self._value):
+                drop = ((np.count_nonzero(np.isnan(sample)) / math.prod(sample.shape)) * 100) >= self._percentage
+            else:
+                drop = ((np.count_nonzero(sample == self._value) / math.prod(sample.shape)) * 100) >= self._percentage
+        elif isinstance(sample, xr.Dataset):
+            if np.isnan(self._value):
+                nmatches = np.sum(list(np.isnan(sample).sum().values()))
+                nvalues = np.sum([math.prod(v.shape) for v in sample.values()])
+                drop = nmatches / nvalues * 100 >= self._percentage
+            else:
+                nmatches = np.sum(list((sample == 1).sum().values()))
+                nvalues = np.sum([math.prod(v.shape) for v in sample.values()])
+                drop = nmatches / nvalues * 100 >= self._percentage
         else:
-            function = (  # noqa
-                lambda x: ((np.count_nonzero(x == self._value) / math.prod(x.shape)) * 100) >= self._percentage
-            )  # noqa
+            raise TypeError("This filter only accepts xr.DataArray or xr.Dataset")
 
-        if not function(sample):
+        if not drop:
             raise PipelineFilterException(sample, f"Data contained more than {self._percentage}% of {self._value}.")
 
 
@@ -198,7 +227,7 @@ def _find_shape(self, data: T) -> tuple[int, ...]:
 
     def filter(self, sample: Union[tuple[T, ...], T]):
         if isinstance(sample, (list, tuple)):
-            if not isinstance(self._shape, (list, tuple)) and len(self._shape) == len(sample):
+            if not (isinstance(self._shape, (list, tuple)) and len(self._shape) == len(sample)):
                 raise RuntimeError(
                     f"If sample is tuple, shape must also be, and of the same length. {self._shape} != {tuple(self._find_shape(i) for i in sample)}"
                 )

diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/remapping/__init__.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/remapping/__init__.py
@@ -27,7 +27,7 @@
 except ImportError:
 
     class HEALPix:
-        def __init__(self):
+        def __init__(self, *args, **kwargs):
             warnings.warn(
                 "Could not import the healpix projection, please install the 'healpy' and 'reproject' optional dependencies"
             )
diff --git a/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/remapping/base.py b/packages/pipeline/src/pyearthtools/pipeline/operations/xarray/remapping/base.py
@@ -21,7 +21,7 @@
 from typing import Type, TypeVar
 import xarray as xr
 
-from pyearthtools.pipeline import Operation
+from pyearthtools.pipeline.operation import Operation
 
 XR_TYPE = TypeVar("XR_TYPE", xr.Dataset, xr.DataArray)
 

diff --git a/packages/pipeline/tests/operations/dask/test_dask_filter.py b/packages/pipeline/tests/operations/dask/test_dask_filter.py
@@ -0,0 +1,111 @@
+# Copyright Commonwealth of Australia, Bureau of Meteorology 2025.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from pyearthtools.pipeline.operations.dask import filters
+from pyearthtools.pipeline.exceptions import PipelineFilterException
+
+import numpy as np
+import dask.array as da
+import pytest
+
+
+def test_DropAnyNan():
+    """Tests DropAnyNan dask filter."""
+
+    original = da.ones((2, 2))
+
+    # no nans - should succeed quietly
+    drop = filters.DropAnyNan()
+    drop.filter(original)
+
+    # one nan - should raise exception
+    original[0, 0] = np.nan
+    drop = filters.DropAnyNan()
+    with pytest.raises(PipelineFilterException):
+        drop.filter(original)
+
+
+# xfailed since the result seems to be inverted to documented requirements
+@pytest.mark.xfail
+def test_DropAllNan():
+    """Tests DropAllNan dask filter."""
+
+    original = da.empty((2, 2))
+
+    # no nans - should succeed quietly
+    drop = filters.DropAllNan()
+    drop.filter(original)
+
+    # one nan - should succeed quietly
+    original[0, 0] = np.nan
+    drop.filter(original)
+
+    # all nans - should raise exception
+    original[:, :] = np.nan
+    with pytest.raises(PipelineFilterException):
+        drop.filter(original)
+
+
+def test_DropValue():
+    """Tests DropValue dask filter."""
+
+    original = da.from_array([[0, 0], [1, 2]])
+
+    # drop case (num zeros < threshold)
+    drop = filters.DropValue(0, 75)
+    with pytest.raises(PipelineFilterException):
+        drop.filter(original)
+
+    # non-drop case  (num zeros >= threshold)
+    drop = filters.DropValue(0, 50)
+    drop.filter(original)
+
+    # drop case  (num nans < threshold)
+    original = da.from_array([[np.nan, np.nan], [1, 2]])
+    drop = filters.DropValue("nan", 75)
+    with pytest.raises(PipelineFilterException):
+        drop.filter(original)
+
+    # non-drop case (num nans >= threshold)
+    drop = filters.DropValue("nan", 50)
+    drop.filter(original)
+
+
+def test_Shape():
+    """Tests Shape dask filter."""
+
+    originals = (da.empty((2, 2)), da.empty((2, 3)))
+
+    # check drop case
+    drop = filters.Shape((2, 3))
+    with pytest.raises(PipelineFilterException):
+        drop.filter(originals[0])
+
+    # check non-drop case
+    drop = filters.Shape((2, 2))
+    drop.filter(originals[0])
+
+    # check tuple inputs drop cases
+    drop = filters.Shape(((2, 3), (2, 3)))
+    with pytest.raises(PipelineFilterException):
+        drop.filter(originals)
+
+    # check tuple inputs non-drop cases
+    drop = filters.Shape(((2, 2), (2, 3)))
+    drop.filter(originals)
+
+    # invalid mismatched shape and input
+    drop = filters.Shape(((2, 2),))
+    with pytest.raises(RuntimeError):
+        drop.filter(originals)
diff --git a/packages/pipeline/tests/operations/numpy/test_numpy_filter.py b/packages/pipeline/tests/operations/numpy/test_numpy_filter.py
@@ -35,7 +35,7 @@ def test_DropAnyNan_true():
     drop = filters.DropAnyNan()
 
     with pytest.raises(PipelineFilterException):
-        result = drop.filter(original)
+        drop.filter(original)
 
 
 def test_DropAllNan_false():
@@ -54,4 +54,59 @@ def test_DropAllNan_true():
     drop = filters.DropAllNan()
 
     with pytest.raises(PipelineFilterException):
-        result = drop.filter(original)
+        drop.filter(original)
+
+
+def test_DropValue():
+
+    # test drop case
+    original = np.array([[1, 1], [np.nan, np.nan]])
+
+    drop = filters.DropValue(value=1, percentage=75)
+
+    with pytest.raises(PipelineFilterException):
+        drop.filter(original)
+
+    # test no drop case
+    drop = filters.DropValue(value=1, percentage=50)
+    drop.filter(original)
+
+    # test with nan - drop case
+    drop = filters.DropValue(value="nan", percentage=75)
+
+    with pytest.raises(PipelineFilterException):
+        drop.filter(original)
+
+    # no drop case
+    drop = filters.DropValue(value="nan", percentage=50)
+    drop.filter(original)
+
+
+def test_Shape():
+
+    # test drop case
+    original = np.empty((2, 3))
+    drop = filters.Shape((2, 2))
+
+    with pytest.raises(PipelineFilterException):
+        drop.filter(original)
+
+    # test non-drop case
+    original = np.empty((2, 2))
+    drop.filter(original)
+
+    # test with multiple inputs
+    originals = (np.empty((2, 3)), np.empty((2, 2)))
+    drop = filters.Shape(((2, 2), (2, 3)))
+
+    with pytest.raises(PipelineFilterException):
+        drop.filter(originals)
+
+    # test non drop case
+    drop = filters.Shape(((2, 3), (2, 2)))
+    drop.filter(originals)
+
+    # test mismatched number of input shapes
+    drop = filters.Shape(((1, 2), (3, 4), (5, 6)))
+    with pytest.raises(RuntimeError):
+        drop.filter(originals)