Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions docs/source/api/functions.rst
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ Masking & Cleanup Utilities
functions/mask_in_range
functions/mask_out_range
functions/mask_scl
functions/mask_with_scl
functions/replace_nans

Distance Metrics
Expand Down
149 changes: 149 additions & 0 deletions docs/source/api/functions/mask_with_scl.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
mask_with_scl
=============

.. currentmodule:: eo_processor
.. autofunction:: mask_with_scl

Overview
--------
``mask_with_scl`` applies SCL-based masking directly to a data array (e.g., spectral bands).
Unlike ``mask_scl`` which only filters the SCL array itself, this function uses the SCL
as a classification layer to mask corresponding pixels in the actual data.

This is particularly useful when:

- Processing multi-band imagery where clouds/shadows need to be removed
- Working with time-series data where the SCL mask should be applied consistently
- Building composites that require pre-masking of invalid pixels

Supported Array Shapes
----------------------
The function supports the following combinations:

+----------------------------+------------------------+
| Data Shape | SCL Shape |
+============================+========================+
| 2D: (y, x) | 2D: (y, x) |
+----------------------------+------------------------+
| 3D: (time, y, x) | 3D: (time, y, x) |
+----------------------------+------------------------+
| 4D: (time, band, y, x) | 3D: (time, y, x) |
+----------------------------+------------------------+

For 4D data with 3D SCL, the mask is broadcast across all bands, ensuring that if a pixel
is cloudy at time *t*, it is masked in all spectral bands at that time step.

Default Mask Codes
------------------
If ``mask_codes`` is not provided, the function masks the following SCL classes:

- 0: No Data
- 1: Saturated / Defective
- 2: Dark Area Pixels
- 3: Cloud Shadows
- 8: Cloud (Medium Probability)
- 9: Cloud (High Probability)
- 10: Thin Cirrus

This leaves valid surface pixels (vegetation, bare soil, water, snow/ice) intact.

Typical Sentinel-2 SCL Codes
----------------------------
For reference:

+------+-------------------------------------------+--------+
| Code | Meaning | Action |
+------+-------------------------------------------+--------+
| 0 | No Data | Mask |
| 1 | Saturated / Defective | Mask |
| 2 | Dark Area Pixels | Mask |
| 3 | Cloud Shadows | Mask |
| 4 | Vegetation | Keep |
| 5 | Not Vegetated | Keep |
| 6 | Water | Keep |
| 7 | Unclassified | Keep |
| 8 | Cloud (Medium Probability) | Mask |
| 9 | Cloud (High Probability) | Mask |
| 10 | Thin Cirrus | Mask |
| 11 | Snow / Ice | Keep |
+------+-------------------------------------------+--------+

Parameters
----------
- ``data`` (``numpy.ndarray``): The data array to mask (2D, 3D, or 4D).
- ``scl`` (``numpy.ndarray``): The SCL classification layer (2D or 3D).
- ``mask_codes`` (``Sequence[float] | None``): SCL codes to mask. Defaults to
``[0, 1, 2, 3, 8, 9, 10]``.
- ``fill_value`` (``float | None``): Value for masked pixels. Defaults to ``NaN``.

Returns
-------
``numpy.ndarray`` (``float64``): Data array with masked pixels replaced.

Examples
--------

Basic 3D Usage
~~~~~~~~~~~~~~
.. code-block:: python

import numpy as np
from eo_processor import mask_with_scl

# Sentinel-2 bands: shape (time=10, y=100, x=100)
red_band = np.random.rand(10, 100, 100)
scl = np.random.choice([4, 5, 6, 8, 9], size=(10, 100, 100))

# Mask cloud pixels (8, 9) in the red band
masked_red = mask_with_scl(red_band, scl)

4D Multi-Band Usage
~~~~~~~~~~~~~~~~~~~
.. code-block:: python

import numpy as np
from eo_processor import mask_with_scl

# Multi-band data: shape (time=10, band=4, y=100, x=100)
# Bands: B02 (Blue), B03 (Green), B04 (Red), B08 (NIR)
data = np.random.rand(10, 4, 100, 100)
scl = np.random.choice([4, 5, 6, 8, 9], size=(10, 100, 100))

# Mask clouds across all bands
masked_data = mask_with_scl(data, scl)

Custom Mask Codes
~~~~~~~~~~~~~~~~~
.. code-block:: python

# Only mask high-probability clouds and shadows
masked = mask_with_scl(data, scl, mask_codes=[3, 9])

Custom Fill Value
~~~~~~~~~~~~~~~~~
.. code-block:: python

# Use -9999 as nodata instead of NaN
masked = mask_with_scl(data, scl, fill_value=-9999.0)

Integration with XArray/Dask
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
.. code-block:: python

import xarray as xr
from eo_processor import mask_with_scl

# Apply via xr.apply_ufunc for chunked processing
masked_data = xr.apply_ufunc(
mask_with_scl,
data_array, # 4D: (time, band, y, x)
scl_array, # 3D: (time, y, x)
dask="parallelized",
output_dtypes=[float],
)

See Also
--------
- :func:`mask_scl` - Filter the SCL array itself
- :func:`mask_vals` - Mask specific values in any array
- :func:`replace_nans` - Replace NaN values after masking
48 changes: 48 additions & 0 deletions python/eo_processor/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
mask_invalid as _mask_invalid,
mask_out_range as _mask_out_range,
mask_scl as _mask_scl,
mask_with_scl as _mask_with_scl,
mask_vals as _mask_vals,
median as _median,
minkowski_distance as _minkowski_distance,
Expand Down Expand Up @@ -97,6 +98,7 @@
"mask_invalid",
"mask_out_range",
"mask_scl",
"mask_with_scl",
"mask_vals",
"median",
"minkowski_distance",
Expand Down Expand Up @@ -717,6 +719,52 @@ def mask_scl(scl, keep_codes=None, fill_value=None):
return _mask_scl(scl, keep_codes=keep_codes, fill_value=fill_value)


def mask_with_scl(data, scl, mask_codes=None, fill_value=None):
"""
Apply SCL-based masking to a data array.

This function masks pixels in the data array based on the Scene Classification
Layer (SCL) values. Unlike ``mask_scl`` which only returns a masked SCL array,
this function applies the mask to actual data (e.g., spectral bands).

Supported array shapes:
- 2D data (y, x) with 2D SCL (y, x)
- 3D data (time, y, x) with 3D SCL (time, y, x)
- 4D data (time, band, y, x) with 3D SCL (time, y, x) - SCL broadcast across bands

Parameters
----------
data : numpy.ndarray
The data array to mask (2D, 3D, or 4D).
scl : numpy.ndarray
The SCL array (2D or 3D). For 4D data, SCL should be 3D (time, y, x).
mask_codes : sequence of float, optional
SCL codes to mask (set to fill_value). Defaults to clouds/shadows/etc:
[0, 1, 2, 3, 8, 9, 10] (no data, saturated, dark, shadow, cloud med/high, cirrus).
fill_value : float, optional
Value to assign to masked pixels. Defaults to NaN.

Returns
-------
numpy.ndarray
Data array with masked pixels replaced by fill_value.

Examples
--------
>>> import numpy as np
>>> from eo_processor import mask_with_scl
>>> # 3D example: (time=2, y=3, x=3)
>>> data = np.ones((2, 3, 3), dtype=np.float64)
>>> scl = np.array([[[4, 4, 9], [4, 8, 4], [4, 4, 4]],
... [[4, 4, 4], [3, 4, 4], [4, 4, 10]]], dtype=np.float64)
>>> result = mask_with_scl(data, scl)
>>> # Pixels with SCL codes 9, 8, 3, 10 are now NaN
>>> np.isnan(result[0, 0, 2]) # SCL=9 (cloud high)
True
"""
return _mask_with_scl(data, scl, mask_codes=mask_codes, fill_value=fill_value)


def moving_average_temporal(arr, window, skip_na=True, mode="same"):
"""
Sliding window mean along leading time axis of a 1D–4D time-first array.
Expand Down
6 changes: 6 additions & 0 deletions python/eo_processor/__init__.pyi
Original file line number Diff line number Diff line change
Expand Up @@ -124,5 +124,11 @@ def mask_scl(
keep_codes: Optional[Sequence[float]] = ...,
fill_value: Optional[float] = ...,
) -> NDArray[np.float64]: ...
def mask_with_scl(
data: NumericArray,
scl: NumericArray,
mask_codes: Optional[Sequence[float]] = ...,
fill_value: Optional[float] = ...,
) -> NDArray[np.float64]: ...

# Raises ValueError if p < 1.0
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ fn _core(_py: Python, m: &PyModule) -> PyResult<()> {
m.add_function(wrap_pyfunction!(masking::mask_invalid, m)?)?;
m.add_function(wrap_pyfunction!(masking::mask_in_range, m)?)?;
m.add_function(wrap_pyfunction!(masking::mask_scl, m)?)?;
m.add_function(wrap_pyfunction!(masking::mask_with_scl, m)?)?;
// --- Advanced Processes ---
m.add_function(wrap_pyfunction!(processes::moving_average_temporal, m)?)?;
m.add_function(wrap_pyfunction!(
Expand Down
144 changes: 144 additions & 0 deletions src/masking.rs
Original file line number Diff line number Diff line change
Expand Up @@ -443,6 +443,150 @@ pub fn mask_scl(
Ok(result_array.into_pyarray(py).into_py(py))
}

/// Apply SCL-based masking to a data array.
///
/// This function masks pixels in the data array based on the Scene Classification
/// Layer (SCL) values. Unlike `mask_scl` which only returns a masked SCL array,
/// this function applies the mask to actual data (e.g., spectral bands).
///
/// Supported array shapes:
/// - 2D data (y, x) with 2D SCL (y, x)
/// - 3D data (time, y, x) with 3D SCL (time, y, x)
/// - 4D data (time, band, y, x) with 3D SCL (time, y, x) - SCL broadcast across bands
///
/// Parameters
/// ----------
/// data : numpy.ndarray
/// The data array to mask (2D, 3D, or 4D).
/// scl : numpy.ndarray
/// The SCL array (2D or 3D). For 4D data, SCL should be 3D (time, y, x).
/// mask_codes : sequence of float, optional
/// SCL codes to mask (set to fill_value). Defaults to clouds/shadows/etc:
/// [0, 1, 2, 3, 8, 9, 10] (no data, saturated, dark, shadow, cloud med/high, cirrus).
/// fill_value : float, optional
/// Value to assign to masked pixels. Defaults to NaN.
///
/// Returns
/// -------
/// numpy.ndarray
/// Data array with masked pixels replaced by fill_value.
#[pyfunction]
#[pyo3(signature = (data, scl, mask_codes=None, fill_value=None))]
pub fn mask_with_scl(
py: Python<'_>,
data: &PyAny,
scl: &PyAny,
mask_codes: Option<Vec<f64>>,
fill_value: Option<f64>,
) -> PyResult<PyObject> {
// Default SCL codes to MASK (remove): no data, saturated, dark, shadow, cloud med/high, cirrus
let default_mask = vec![0.0, 1.0, 2.0, 3.0, 8.0, 9.0, 10.0];
let codes_to_mask = mask_codes.unwrap_or(default_mask);
let fill = fill_value.unwrap_or(f64::NAN);

// Try 2D data with 2D SCL
if let (Ok(data_2d), Ok(scl_2d)) = (
coerce_2d(data),
coerce_2d(scl),
) {
let data_arr = data_2d.as_array();
let scl_arr = scl_2d.as_array();

if data_arr.shape() != scl_arr.shape() {
return Err(CoreError::InvalidArgument(format!(
"Data shape {:?} does not match SCL shape {:?}",
data_arr.shape(),
scl_arr.shape()
))
.into());
}

let mut out = data_arr.to_owned();
ndarray::Zip::from(&mut out)
.and(&scl_arr)
.for_each(|d, &s| {
if codes_to_mask.contains(&s) {
*d = fill;
}
});

return Ok(out.into_pyarray(py).into_py(py));
}

// Try 3D data with 3D SCL (time, y, x)
if let (Ok(data_3d), Ok(scl_3d)) = (
coerce_3d(data),
coerce_3d(scl),
) {
let data_arr = data_3d.as_array();
let scl_arr = scl_3d.as_array();

if data_arr.shape() != scl_arr.shape() {
return Err(CoreError::InvalidArgument(format!(
"Data shape {:?} does not match SCL shape {:?}",
data_arr.shape(),
scl_arr.shape()
))
.into());
}

let mut out = data_arr.to_owned();
ndarray::Zip::from(&mut out)
.and(&scl_arr)
.for_each(|d, &s| {
if codes_to_mask.contains(&s) {
*d = fill;
}
});

return Ok(out.into_pyarray(py).into_py(py));
}

// Try 4D data (time, band, y, x) with 3D SCL (time, y, x)
// SCL is broadcast across all bands
if let (Ok(data_4d), Ok(scl_3d)) = (
coerce_4d(data),
coerce_3d(scl),
) {
let data_arr = data_4d.as_array();
let scl_arr = scl_3d.as_array();
let (t, b, h, w) = data_arr.dim();
let (scl_t, scl_h, scl_w) = scl_arr.dim();

if t != scl_t || h != scl_h || w != scl_w {
return Err(CoreError::InvalidArgument(format!(
"Data shape ({}, {}, {}, {}) does not align with SCL shape ({}, {}, {})",
t, b, h, w, scl_t, scl_h, scl_w
))
.into());
}

let mut out = data_arr.to_owned();

// Iterate over time and spatial dimensions, applying mask across all bands
for ti in 0..t {
for yi in 0..h {
for xi in 0..w {
let scl_val = scl_arr[[ti, yi, xi]];
if codes_to_mask.contains(&scl_val) {
for bi in 0..b {
out[[ti, bi, yi, xi]] = fill;
}
}
}
}
}

return Ok(out.into_pyarray(py).into_py(py));
}

Err(CoreError::InvalidArgument(
"mask_with_scl requires: 2D data + 2D SCL, 3D data + 3D SCL, or 4D data + 3D SCL."
.to_string(),
)
.into())
}

#[cfg(test)]
mod tests {
use super::*;
Expand Down
Loading