xarray-contrib · brendancol · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/README.md b/README.md
@@ -155,10 +155,10 @@ In the GIS world, rasters are used for representing continuous phenomena (e.g. e
 
 | Name | Description | NumPy xr.DataArray | Dask xr.DataArray | CuPy GPU xr.DataArray | Dask GPU xr.DataArray |
 |:----------:|:------------|:----------------------:|:--------------------:|:-------------------:|:------:|
-| [Apply](xrspatial/focal.py) | Applies a custom function over a sliding neighborhood window | ✅️ | ✅️ |  |  |
+| [Apply](xrspatial/focal.py) | Applies a custom function over a sliding neighborhood window | ✅️ | ✅️ | ✅️ | ✅️ |
 | [Hotspots](xrspatial/focal.py) | Identifies statistically significant spatial clusters using Getis-Ord Gi* | ✅️ | ✅️ | ✅️ |  |
-| [Mean](xrspatial/focal.py) | Computes the mean value within a sliding neighborhood window | ✅️ | ✅️ | ✅️ | |
-| [Focal Statistics](xrspatial/focal.py) | Computes summary statistics over a sliding neighborhood window | ✅️ | ✅️ | ✅️ | |
+| [Mean](xrspatial/focal.py) | Computes the mean value within a sliding neighborhood window | ✅️ | ✅️ | ✅️ | ✅️ |
+| [Focal Statistics](xrspatial/focal.py) | Computes summary statistics over a sliding neighborhood window | ✅️ | ✅️ | ✅️ | ✅️ |
 
 -------
 

diff --git a/xrspatial/focal.py b/xrspatial/focal.py
@@ -77,6 +77,16 @@ def _mean_dask_numpy(data, excludes, boundary='nan'):
     return out
 
 
+def _mean_dask_cupy(data, excludes, boundary='nan'):
+    data = data.astype(cupy.float32)
+    _func = partial(_mean_cupy, excludes=excludes)
+    out = data.map_overlap(_func,
+                           depth=(1, 1),
+                           boundary=_boundary_to_dask(boundary, is_cupy=True),
+                           meta=cupy.array(()))
+    return out
+
+
 @cuda.jit
 def _mean_gpu(data, excludes, out):
     # 1. Get coordinates: x is Column, y is Row
@@ -161,8 +171,7 @@ def _mean(data, excludes, boundary='nan'):
         numpy_func=partial(_mean_numpy_boundary, boundary=boundary),
         cupy_func=_mean_cupy,
         dask_func=partial(_mean_dask_numpy, boundary=boundary),
-        dask_cupy_func=lambda *args: not_implemented_func(
-            *args, messages='mean() does not support dask with cupy backed DataArray.'),  # noqa
+        dask_cupy_func=partial(_mean_dask_cupy, boundary=boundary),
     )
     out = mapper(agg)(agg.data, excludes)
     return out
@@ -370,6 +379,22 @@ def _apply_dask_numpy(data, kernel, func, boundary='nan'):
     return out
 
 
+def _apply_cupy(data, kernel, func):
+    return _focal_stats_func_cupy(data.astype(cupy.float32), kernel, func)
+
+
+def _apply_dask_cupy(data, kernel, func, boundary='nan'):
+    data = data.astype(cupy.float32)
+    pad_h = kernel.shape[0] // 2
+    pad_w = kernel.shape[1] // 2
+    _func = partial(_focal_stats_func_cupy, kernel=kernel, func=func)
+    out = data.map_overlap(_func,
+                           depth=(pad_h, pad_w),
+                           boundary=_boundary_to_dask(boundary, is_cupy=True),
+                           meta=cupy.array(()))
+    return out
+
+
 def apply(raster, kernel, func=_calc_mean, name='focal_apply', boundary='nan'):
     """
     Returns custom function applied array using a user-created window.
@@ -378,11 +403,14 @@ def apply(raster, kernel, func=_calc_mean, name='focal_apply', boundary='nan'):
     ----------
     raster : xarray.DataArray
         2D array of input values to be filtered. Can be a NumPy backed,
-        or Dask with NumPy backed DataArray.
+        CuPy backed, Dask with NumPy backed, or Dask with CuPy backed
+        DataArray.
     kernel : numpy.ndarray
         2D array where values of 1 indicate the kernel.
     func : callable, default=xrspatial.focal._calc_mean
         Function which takes an input array and returns an array.
+        For cupy and dask+cupy backends the function must be a
+        ``@cuda.jit`` global kernel with signature ``(data, kernel, out)``.
     boundary : str, default='nan'
         How to handle edges where the kernel extends beyond the raster.
         ``'nan'``     -- fill missing neighbours with NaN (default).
@@ -496,11 +524,9 @@ def apply(raster, kernel, func=_calc_mean, name='focal_apply', boundary='nan'):
     # the function func must be a @ngjit
     mapper = ArrayTypeFunctionMapping(
         numpy_func=partial(_apply_numpy_boundary, boundary=boundary),
-        cupy_func=lambda *args: not_implemented_func(
-            *args, messages='apply() does not support cupy backed DataArray.'),
+        cupy_func=_apply_cupy,
         dask_func=partial(_apply_dask_numpy, boundary=boundary),
-        dask_cupy_func=lambda *args: not_implemented_func(
-            *args, messages='apply() does not support dask with cupy backed DataArray.'),
+        dask_cupy_func=partial(_apply_dask_cupy, boundary=boundary),
     )
     out = mapper(raster)(raster.data, kernel, func)
     result = DataArray(out,
@@ -818,6 +844,32 @@ def _focal_stats_cupy(agg, kernel, stats_funcs):
     return stats
 
 
+def _focal_stats_dask_cupy(agg, kernel, stats_funcs, boundary='nan'):
+    _stats_cuda_mapper = dict(
+        mean=_focal_mean_cuda, sum=_focal_sum_cuda,
+        range=_focal_range_cuda, max=_focal_max_cuda,
+        min=_focal_min_cuda, std=_focal_std_cuda, var=_focal_var_cuda,
+    )
+    pad_h = kernel.shape[0] // 2
+    pad_w = kernel.shape[1] // 2
+    dask_bnd = _boundary_to_dask(boundary, is_cupy=True)
+
+    stats_aggs = []
+    for stat_name in stats_funcs:
+        cuda_kernel = _stats_cuda_mapper[stat_name]
+        _func = partial(_focal_stats_func_cupy, kernel=kernel, func=cuda_kernel)
+        data = agg.data.astype(cupy.float32)
+        stats_data = data.map_overlap(
+            _func, depth=(pad_h, pad_w),
+            boundary=dask_bnd, meta=cupy.array(()))
+        stats_agg = xr.DataArray(
+            stats_data, dims=agg.dims, coords=agg.coords, attrs=agg.attrs)
+        stats_aggs.append(stats_agg)
+    stats = xr.concat(stats_aggs,
+                      pd.Index(stats_funcs, name='stats', dtype=object))
+    return stats
+
+
 def _focal_stats_cpu(agg, kernel, stats_funcs, boundary='nan'):
     _function_mapping = {
         'mean': _calc_mean,
@@ -852,7 +904,8 @@ def focal_stats(agg,
     ----------
     agg : xarray.DataArray
         2D array of input values to be analysed. Can be a NumPy backed,
-        Cupy backed, or Dask with NumPy backed DataArray.
+        CuPy backed, Dask with NumPy backed, or Dask with CuPy backed
+        DataArray.
     kernel : numpy.array
         2D array where values of 1 indicate the kernel.
     stats_funcs: list of string
@@ -920,8 +973,7 @@ def focal_stats(agg,
         numpy_func=partial(_focal_stats_cpu, boundary=boundary),
         cupy_func=_focal_stats_cupy,
         dask_func=partial(_focal_stats_cpu, boundary=boundary),
-        dask_cupy_func=lambda *args: not_implemented_func(
-            *args, messages='focal_stats() does not support dask with cupy backed DataArray.'),
+        dask_cupy_func=partial(_focal_stats_dask_cupy, boundary=boundary),
     )
     result = mapper(agg)(agg, kernel, stats_funcs)
     return result

diff --git a/xrspatial/tests/test_focal.py b/xrspatial/tests/test_focal.py
@@ -93,22 +93,24 @@ def test_mean_transfer_function_gpu_equals_cpu():
 
 @dask_array_available
 @cuda_and_cupy_available
-def test_mean_transfer_dask_gpu_raise_not_implemented():
+def test_mean_transfer_function_dask_gpu():
 
     import cupy
 
-    # cupy case
-    cupy_agg = xr.DataArray(cupy.asarray(data_random))
-    cupy_mean = mean(cupy_agg)
-    general_output_checks(cupy_agg, cupy_mean)
+    # numpy reference
+    numpy_agg = xr.DataArray(data_random)
+    numpy_mean = mean(numpy_agg)
 
-    # dask + cupy case not implemented
+    # dask + cupy case
     dask_cupy_agg = xr.DataArray(
         da.from_array(cupy.asarray(data_random), chunks=(3, 3))
     )
-    with pytest.raises(NotImplementedError) as e_info:
-        mean(dask_cupy_agg)
-        assert e_info
+    dask_cupy_mean = mean(dask_cupy_agg)
+    general_output_checks(dask_cupy_agg, dask_cupy_mean)
+
+    np.testing.assert_allclose(
+        numpy_mean.data, dask_cupy_mean.data.compute().get(),
+        equal_nan=True, rtol=1e-4)
 
 
 @pytest.fixture
@@ -351,6 +353,53 @@ def test_apply_dask_numpy(data_apply):
     general_output_checks(dask_numpy_agg, dask_numpy_apply, expected_result)
 
 
+@cuda_and_cupy_available
+def test_apply_cupy(data_apply):
+    from xrspatial.focal import _focal_mean_cuda
+
+    data, kernel, expected_result_zero = data_apply
+    # numpy reference using _calc_mean
+    numpy_agg = create_test_raster(data)
+    numpy_apply = apply(numpy_agg, kernel)
+
+    # cupy case with equivalent CUDA kernel
+    cupy_agg = create_test_raster(data, backend='cupy')
+    cupy_apply = apply(cupy_agg, kernel, _focal_mean_cuda)
+    general_output_checks(cupy_agg, cupy_apply)
+
+    np.testing.assert_allclose(
+        numpy_apply.data, cupy_apply.data.get(),
+        equal_nan=True, rtol=1e-4)
+
+
+@dask_array_available
+@cuda_and_cupy_available
+def test_apply_dask_cupy():
+    from xrspatial.focal import _focal_mean_cuda
+
+    # Use a larger array so chunk interiors are meaningful
+    rng = np.random.default_rng(42)
+    data = rng.random((20, 24)).astype(np.float64)
+    kernel = np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]])
+
+    # cupy reference (same CUDA kernel)
+    cupy_agg = create_test_raster(data, backend='cupy')
+    cupy_apply = apply(cupy_agg, kernel, _focal_mean_cuda)
+
+    # dask + cupy case
+    dask_cupy_agg = create_test_raster(data, backend='dask+cupy', chunks=(10, 12))
+    dask_cupy_apply = apply(dask_cupy_agg, kernel, _focal_mean_cuda)
+    general_output_checks(dask_cupy_agg, dask_cupy_apply, verify_attrs=False)
+
+    # Compare interior (boundary='nan' causes edge differences between
+    # cupy single-GPU bounds-clamping and dask map_overlap NaN-padding)
+    pad = kernel.shape[0] // 2
+    np.testing.assert_allclose(
+        cupy_apply.data[pad:-pad, pad:-pad].get(),
+        dask_cupy_apply.data[pad:-pad, pad:-pad].compute().get(),
+        equal_nan=True, rtol=1e-4)
+
+
 @pytest.fixture
 def data_focal_stats():
     data = np.arange(16).reshape(4, 4)
@@ -424,6 +473,32 @@ def test_focal_stats_gpu(data_focal_stats):
     )
 
 
+@dask_array_available
+@cuda_and_cupy_available
+def test_focal_stats_dask_cupy():
+    # Use larger data so chunk interiors are meaningful
+    rng = np.random.default_rng(42)
+    data = rng.random((20, 24)).astype(np.float64)
+    kernel = custom_kernel(np.array([[0, 1, 0], [1, 1, 1], [0, 1, 0]]))
+
+    # cupy reference
+    cupy_agg = create_test_raster(data, backend='cupy')
+    cupy_focalstats = focal_stats(cupy_agg, kernel)
+
+    # dask + cupy case
+    dask_cupy_agg = create_test_raster(data, backend='dask+cupy', chunks=(10, 12))
+    dask_cupy_focalstats = focal_stats(dask_cupy_agg, kernel)
+    assert dask_cupy_focalstats.ndim == 3
+
+    # Compare interior (boundary='nan' causes edge differences between
+    # cupy single-GPU bounds-clamping and dask map_overlap NaN-padding)
+    pad = kernel.shape[0] // 2
+    np.testing.assert_allclose(
+        cupy_focalstats.data[:, pad:-pad, pad:-pad].get(),
+        dask_cupy_focalstats.data[:, pad:-pad, pad:-pad].compute().get(),
+        equal_nan=True, rtol=1e-4)
+
+
 @pytest.fixture
 def data_hotspots():
     data = np.asarray([

diff --git a/xrspatial/utils.py b/xrspatial/utils.py
@@ -704,9 +704,9 @@ def _extract_latlon_coords(agg: xr.DataArray):
     if lat_vals.ndim == 1 and lon_vals.ndim == 1:
         # Regular grid: broadcast to 2-D
         lat_2d = np.broadcast_to(lat_vals[:, np.newaxis],
-                                 (agg.sizes[dim_y], agg.sizes[dim_x])).copy()
+                                 (agg.sizes[dim_y], agg.sizes[dim_x]))
         lon_2d = np.broadcast_to(lon_vals[np.newaxis, :],
-                                 (agg.sizes[dim_y], agg.sizes[dim_x])).copy()
+                                 (agg.sizes[dim_y], agg.sizes[dim_x]))
     elif lat_vals.ndim == 2 and lon_vals.ndim == 2:
         lat_2d = lat_vals
         lon_2d = lon_vals