Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
62 commits
Select commit Hold shift + click to select a range
f3b155c
dev
davidhassell Jul 13, 2025
bae834a
dev
davidhassell Jul 14, 2025
b1f13b7
dev
davidhassell Jul 15, 2025
7336fba
dev
davidhassell Jul 15, 2025
d9bf322
dev
davidhassell Jul 15, 2025
03aad42
dev
davidhassell Jul 16, 2025
63eba70
dev
davidhassell Jul 16, 2025
412cd52
dev
davidhassell Jul 17, 2025
2b1dd83
dev
davidhassell Jul 17, 2025
f7b8316
dev
davidhassell Jul 17, 2025
c75ee35
dev
davidhassell Jul 17, 2025
04bbe3d
dev
davidhassell Jul 17, 2025
2c127f8
dev
davidhassell Jul 17, 2025
b7cac3f
dev
davidhassell Jul 18, 2025
a13bdca
dev
davidhassell Jul 18, 2025
140e57d
dev
davidhassell Jul 18, 2025
81bd4c0
dev
davidhassell Jul 18, 2025
3a6fae6
dev
davidhassell Jul 18, 2025
14e28a7
dev
davidhassell Jul 18, 2025
75d38b3
dev
davidhassell Jul 18, 2025
adff1d7
dev
davidhassell Jul 21, 2025
05a1936
dev
davidhassell Jul 21, 2025
7262686
dev
davidhassell Jul 21, 2025
04edbca
dev
davidhassell Jul 21, 2025
150b1c6
dev
davidhassell Jul 21, 2025
56d4a6b
dev
davidhassell Jul 21, 2025
12aa457
dev
davidhassell Jul 21, 2025
df3a466
dev
davidhassell Jul 21, 2025
54ea811
dev
davidhassell Jul 21, 2025
1566d62
dev
davidhassell Jul 21, 2025
68c88eb
dev
davidhassell Jul 21, 2025
f569a3c
dev
davidhassell Jul 21, 2025
eca9fb8
dev
davidhassell Jul 21, 2025
d408049
dev
davidhassell Jul 21, 2025
4af2ba4
dev
davidhassell Jul 21, 2025
89b0483
dev
davidhassell Jul 23, 2025
027c4d3
dev
davidhassell Jul 24, 2025
c327818
Merge branch 'regrid-weights-chunks' of github.com:davidhassell/cf-py…
davidhassell Jul 24, 2025
7bacca4
dev
davidhassell Jul 25, 2025
ba7dc31
dev
davidhassell Jul 25, 2025
292ee38
dev
davidhassell Jul 29, 2025
829e970
dev
davidhassell Aug 4, 2025
60ed128
dev
davidhassell Aug 4, 2025
0779108
dev
davidhassell Aug 4, 2025
ffe2761
dev
davidhassell Aug 4, 2025
ca1f808
dev
davidhassell Aug 4, 2025
77eeecd
dev
davidhassell Aug 4, 2025
3758f5f
dev
davidhassell Aug 4, 2025
7244430
dev
davidhassell Aug 4, 2025
339b6c3
dev
davidhassell Aug 4, 2025
8249823
dev
davidhassell Aug 5, 2025
31f131e
dev
davidhassell Aug 8, 2025
e8eb40f
upstream merge
davidhassell Sep 22, 2025
f2ca2d3
dev
davidhassell Sep 22, 2025
e519433
dev
davidhassell Sep 22, 2025
3f23886
grammar
davidhassell Oct 16, 2025
58d058b
Typos
davidhassell Oct 16, 2025
3a97a1f
remove old development comment
davidhassell Oct 16, 2025
6a7f15a
Merge branch 'main' into regrid-weights-chunks
davidhassell Oct 16, 2025
5decf3f
report requested dst_grid_partitions
davidhassell Oct 16, 2025
730c47a
tidy
davidhassell Oct 16, 2025
3acbdf0
dummy_size_1_dimension
davidhassell Oct 16, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions Changelog.rst
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,13 @@ Version NEXTVERSION

* Python 3.9 support removed
(https://github.com/NCAS-CMS/cf-python/issues/896)
* Allow regridding for very large grids. New keyword parameter to
`cf.Field.regrids` and `cf.Field.regridc`: ``dst_grid_partitions``
(https://github.com/NCAS-CMS/cf-python/issues/878)
* Changed dependency: ``Python>=3.10.0``

----

Version 3.18.1
--------------

Expand Down
10 changes: 3 additions & 7 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -111,7 +111,7 @@ of its array manipulation and can:
* regrid structured grid, mesh and DSG field constructs with
(multi-)linear, nearest neighbour, first- and second-order
conservative and higher order patch recovery methods, including 3-d
regridding,
regridding, and large-grid support,
* apply convolution filters to field constructs,
* create running means from field constructs,
* apply differential operators to field constructs,
Expand All @@ -125,12 +125,8 @@ Visualization
Powerful and flexible visualizations of `cf` field constructs,
designed to be produced and configured in as few lines of code as
possible, are available with the [cf-plot
package](https://ncas-cms.github.io/cf-plot/build/index.html), which
needs to be installed separately to the `cf` package.

See the [cf-plot
gallery](https://ncas-cms.github.io/cf-plot/build/gallery.html) for a
range of plotting possibilities with example code.
package](https://ncas-cms.github.io/cf-plot), which needs to be
installed separately to the `cf` package.

![Example outputs of cf-plot displaying selected aspects of `cf` field constructs](https://raw.githubusercontent.com/NCAS-CMS/cf-plot/master/docs/source/images/cf_gallery_image.png)

Expand Down
28 changes: 11 additions & 17 deletions cf/data/dask_regrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -507,10 +507,10 @@ def _regrid(
# 'weights.indptr', 'weights.indices', and
# 'weights.data' directly, rather than iterating
# over rows of 'weights' and using
# 'weights.getrow'. Also, 'np.count_nonzero' is much
# faster than 'np.any' and 'np.all'.
# 'weights.getrow'. Also, `np.count_nonzero` is much
# faster than `np.any` and `np.all`.
count_nonzero = np.count_nonzero
indptr = weights.indptr.tolist()
indptr = weights.indptr
indices = weights.indices
data = weights.data
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
Expand All @@ -529,8 +529,6 @@ def _regrid(
w[mask] = 0
data[i0:i1] = w

del indptr

elif method in ("linear", "bilinear"):
# 2) Linear methods:
#
Expand All @@ -549,23 +547,21 @@ def _regrid(
# 'weights.indptr', 'weights.indices', and
# 'weights.data' directly, rather than iterating
# over rows of 'weights' and using
# 'weights.getrow'. Also, 'np.count_nonzero' is much
# faster than 'np.any' and 'np.all'.
# 'weights.getrow'. Also, `np.count_nonzero` is much
# faster than `np.any` and `np.all`.
count_nonzero = np.count_nonzero
where = np.where
indptr = weights.indptr.tolist()
indptr = weights.indptr
indices = weights.indices
pos_data = weights.data >= min_weight
data = weights.data
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
mask = src_mask[indices[i0:i1]]
if not count_nonzero(mask):
continue

if where((mask) & (pos_data[i0:i1]))[0].size:
if where(data[i0:i1][mask] >= min_weight)[0].size:
dst_mask[j] = True

del indptr, pos_data

elif method == "nearest_dtos":
# 3) Nearest neighbour dtos method:
#
Expand All @@ -584,10 +580,10 @@ def _regrid(
# 'weights.indptr', 'weights.indices', and
# 'weights.data' directly, rather than iterating
# over rows of 'weights' and using
# 'weights.getrow'. Also, 'np.count_nonzero' is much
# faster than 'np.any' and 'np.all'.
# 'weights.getrow'. Also, `np.count_nonzero` is much
# faster than `np.any` and `np.all`.
count_nonzero = np.count_nonzero
indptr = weights.indptr.tolist()
indptr = weights.indptr
indices = weights.indices
for j, (i0, i1) in enumerate(zip(indptr[:-1], indptr[1:])):
mask = src_mask[indices[i0:i1]]
Expand All @@ -597,8 +593,6 @@ def _regrid(
elif n_masked:
weights.data[np.arange(i0, i1)[mask]] = 0

del indptr

elif method in (
"patch",
"conservative_2nd",
Expand Down
50 changes: 46 additions & 4 deletions cf/docstring/docstring.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,11 @@
weights with the source data. (Note that whilst the `esmpy`
package is also able to create the regridded data from its
weights, this feature can't be integrated with the `dask`
framework that underpins the field's data.)""",
framework that underpins the field's data.)

The calculation of weights for large grids can have a very
high memory requirement, but this can be reduced by setting
the *dst_grid_partitions* parameter.""",
# regrid Logging
"{{regrid Logging}}": """**Logging**

Expand Down Expand Up @@ -436,9 +440,10 @@

**Performance**

The computation of the weights can be much more costly
than the regridding itself, in which case reading
pre-calculated weights can improve performance.
The computation of the weights can take much longer,
and take much more memory, than the regridding itself,
in which case reading pre-calculated weights can
improve performance.

Ignored if *dst* is a `RegridOperator`.""",
# aggregated_units
Expand Down Expand Up @@ -564,6 +569,43 @@
If True then do not perform the regridding, rather
return the `esmpy.Regrid` instance that defines the
regridding operation.""",
# dst_grid_partitions
"{{dst_grid_partitions: `int` or `str`, optional}}": """dst_grid_partitions: `int` or `str`, optional
Calculating the weights matrix for grids with a very large
number of source and/or destination grid points can
potentially require more memory than is
available. However, the memory requirement can be greatly
reduced by calculating weights separately for
non-overlapping partitions of the destination grid, and
then combining the weights from each partition to create
the final weights matrix. The more partitions there are,
the smaller the memory requirement for the weights
calculations, at the expense of the weights calculations
taking longer.

The *dst_grid_partitions* parameter sets the number of
destination grid partitions for the weights
calculations. The default value is ``1``, i.e. one
partition for the entire destination grid, maximising
memory usage and minimising the calculation time. If the
string ``'maximum'`` is given then the largest possible
number of partitions of the destination grid will be used,
minimising memory usage and maximising the calculation
time. A positive integer specifies the exact number of
partitions, capped by the maximum allowed, allowing the
balance between memory usage and calculation time to be
adjusted.

The actual number of destination grid partitions and each
partition's shape, and weights calculation time and memory
requirement are displayed when ``'DEBUG'`` logging is
activated. See *verbose* for details.

.. note:: If setting *dst_grid_partitions* is required for
the regridding to work, then it is worth
considering storing the weights in a file for
fast future access, via the *weights_file*
parameter.""",
# ----------------------------------------------------------------
# Method description substitutions (4 levels of indentation)
# ----------------------------------------------------------------
Expand Down
49 changes: 30 additions & 19 deletions cf/field.py
Original file line number Diff line number Diff line change
Expand Up @@ -382,14 +382,6 @@ def __getitem__(self, indices):
(6, 4, 3)

"""
debug = is_log_level_debug(logger)

if debug:
logger.debug(
self.__class__.__name__ + ".__getitem__"
) # pragma: no cover
logger.debug(f" input indices = {indices}") # pragma: no cover

if indices is Ellipsis:
return self.copy()

Expand Down Expand Up @@ -437,12 +429,6 @@ def __getitem__(self, indices):
else:
findices = indices

if debug:
logger.debug(f" shape = {shape}") # pragma: no cover
logger.debug(f" indices = {indices}") # pragma: no cover
logger.debug(f" indices2 = {indices2}") # pragma: no cover
logger.debug(f" findices = {findices}") # pragma: no cover

new_data = data[tuple(findices)]

if 0 in new_data.shape:
Expand Down Expand Up @@ -496,11 +482,6 @@ def __getitem__(self, indices):
else:
dice.append(slice(None))

if debug:
logger.debug(
f" dice = {tuple(dice)}"
) # pragma: no cover

# Generally we do not apply an ancillary mask to the
# metadata items, but for DSGs we do.
if ancillary_mask and new.DSG:
Expand Down Expand Up @@ -12985,6 +12966,7 @@ def regrids(
ln_z=None,
verbose=None,
return_esmpy_regrid_operator=False,
dst_grid_partitions=1,
inplace=False,
i=False,
_compute_field_mass=None,
Expand Down Expand Up @@ -13229,6 +13211,17 @@ def regrids(

.. versionadded:: 3.16.2

{{dst_grid_partitions: `int` or `str`, optional}}

The maximum number of partitions, Nmax, depends on the
nature of the destination grid: If the Z axis is being
regridded, Nmax = the size of the Z axis. For a 2-d
structured grid, Nmax = the size of the Y axis. For a
UGRID, HEALPix, or DSG grid, Nmax = the size of the
horizontal discrete axis.

.. versionadded:: NEXTVERSION

axis_order: sequence, optional
Deprecated at version 3.14.0.

Expand Down Expand Up @@ -13322,11 +13315,13 @@ def regrids(
z=z,
ln_z=ln_z,
return_esmpy_regrid_operator=return_esmpy_regrid_operator,
dst_grid_partitions=dst_grid_partitions,
inplace=inplace,
)

@_deprecated_kwarg_check("i", version="3.0.0", removed_at="4.0.0")
@_inplace_enabled(default=False)
@_manage_log_level_via_verbosity
def regridc(
self,
dst,
Expand All @@ -13346,6 +13341,8 @@ def regridc(
z=None,
ln_z=None,
return_esmpy_regrid_operator=False,
dst_grid_partitions=1,
verbose=None,
inplace=False,
i=False,
_compute_field_mass=None,
Expand Down Expand Up @@ -13525,6 +13522,19 @@ def regridc(

.. versionadded:: 3.16.2

{{dst_grid_partitions: `int` or `str`, optional}}

Partitioning is only available for 2-d or 3-d
regridding. The maximum number of partitions is the
size of the first of the destination grid axes
specified by the *axes* parameter.

.. versionadded:: NEXTVERSION

{{verbose: `int` or `str` or `None`, optional}}

.. versionadded:: NEXTVERSION

axis_order: sequence, optional
Deprecated at version 3.14.0.

Expand Down Expand Up @@ -13617,6 +13627,7 @@ def regridc(
z=z,
ln_z=ln_z,
return_esmpy_regrid_operator=return_esmpy_regrid_operator,
dst_grid_partitions=dst_grid_partitions,
inplace=inplace,
)

Expand Down
17 changes: 1 addition & 16 deletions cf/mixin/propertiesdatabounds.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
import logging

import numpy as np
from cfdm import is_log_level_debug, is_log_level_info
from cfdm import is_log_level_info

from ..data import Data
from ..decorators import (
Expand Down Expand Up @@ -81,15 +81,6 @@ def __getitem__(self, indices):
else:
findices = tuple(indices)

cname = self.__class__.__name__
if is_log_level_debug(logger):
logger.debug(
f"{cname}.__getitem__: shape = {self.shape}\n"
f"{cname}.__getitem__: indices2 = {indices2}\n"
f"{cname}.__getitem__: indices = {indices}\n"
f"{cname}.__getitem__: findices = {findices}"
) # pragma: no cover

data = self.get_data(None, _fill_value=False)
if data is not None:
new_data = data[findices]
Expand Down Expand Up @@ -133,12 +124,6 @@ def __getitem__(self, indices):
mask.insert_dimension(-1) for mask in findices[1]
]

if is_log_level_debug(logger):
logger.debug(
f"{self.__class__.__name__}.__getitem__: findices for "
f"bounds = {tuple(findices)}"
) # pragma: no cover

new.bounds.set_data(bounds_data[tuple(findices)], copy=False)

# Remove the direction, as it may now be wrong
Expand Down
Loading