Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .gitattributes
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@
# GitHub syntax highlighting
pixi.lock linguist-language=YAML linguist-generated=true
pixi.lock linguist-language=YAML linguist-generated=true -diff
11 changes: 11 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changelog

## [v0.5.10]

### Fix
- Replace `da.to_zarr` with `da.store(..., lock=False)` in pyramid writes (`_on_disk_dask_zoom`, `_on_disk_coarsen`) and region slice writes (`_ops_slices`). Dask >=2025.11's `to_zarr` re-derives chunks via `normalize_chunks(chunks="auto", ...)` and emits a `PerformanceWarning` (treated as error by ngio's filterwarnings) when the result is not a multiple of the target's chunks; `da.store` writes blocks 1:1.
- Copy object/string-dtype zarr arrays directly when consolidating groups: dask >=2025.11 raises `NotImplementedError` from auto-chunking for these dtypes, so they bypass dask and are copied via numpy.
- Set `auto_shard_zarr_v3` together with `zarr_write_format` on `anndata`'s global settings via a new `_update_anndata_global_settings` helper, so reading/writing tables works correctly when mixing zarr v2 and v3 in the same session on anndata 0.12.

### Chores
- Pin `anndata` to `>=0.12.0,<0.13.0`.
- Unpin `dask` (remove the `<2025.11.0` upper bound introduced in v0.4.5).

## [v0.5.9]

### Fix
Expand Down
26,011 changes: 13,003 additions & 13,008 deletions pixi.lock

Large diffs are not rendered by default.

6 changes: 3 additions & 3 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -39,13 +39,13 @@ dependencies = [
"zarr>3",
"scipy",
"fsspec",
"anndata",
"anndata>=0.12.0,<0.13.0",
"pydantic",
"pandas>=1.2.0,<3.0.0",
"requests",
"aiohttp",
"dask[array]<2025.11.0",
"dask[distributed]<2025.11.0",
"dask[array]",
"dask[distributed]",
# "xarray", still not used
"ome-zarr-models",
"pooch",
Expand Down
19 changes: 7 additions & 12 deletions src/ngio/common/_pyramid.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,19 +38,13 @@ def _on_disk_dask_zoom(
source_array = da.from_zarr(source)
target_array = dask_zoom(source_array, target_shape=target.shape, order=order)

# This is a potential fix for Dask 2025.11
# import dask.config
# chunk_size_bytes = np.prod(target.chunks) * target_array.dtype.itemsize
# current_chunk_size = dask.config.get("array.chunk-size")
# Increase the chunk size to avoid dask potentially creating
# corrupted chunks when writing chunks that are not multiple of the
# target chunk size
# dask.config.set({"array.chunk-size": f"{chunk_size_bytes}B"})
target_array = target_array.rechunk(target.chunks)
target_array = target_array.compute_chunk_sizes()
target_array.to_zarr(target)
# Restore previous chunk size
# dask.config.set({"array.chunk-size": current_chunk_size})
# da.store rather than to_zarr: dask >=2025.11's to_zarr internally
# re-derives chunks via normalize_chunks(chunks="auto", ...) and warns
# (treated as error by our filterwarnings) when the result isn't a
# multiple of the zarr target's chunks. da.store writes blocks 1:1.
da.store(target_array, target, lock=False)


def _on_disk_coarsen(
Expand Down Expand Up @@ -100,7 +94,8 @@ def _on_disk_coarsen(
aggregation_function, source_array, coarsening_setup, trim_excess=True
)
out_target = out_target.rechunk(target.chunks)
out_target.to_zarr(target)
# See _on_disk_dask_zoom for rationale.
da.store(out_target, target, lock=False)


def on_disk_zoom(
Expand Down
8 changes: 5 additions & 3 deletions src/ngio/io_pipes/_ops_slices.py
Original file line number Diff line number Diff line change
Expand Up @@ -250,8 +250,10 @@ def set_slice_as_dask(
patch, slice_tuple = handle_int_set_as_dask(patch, slice_tuple)
if ax is None:
# Base case, no tuple in the slicing tuple
# assert False
da.to_zarr(arr=patch, url=zarr_array, region=slice_tuple)
# da.store instead of da.to_zarr: see ngio.common._pyramid for the
# dask>=2025.11 PerformanceWarning regression that to_zarr triggers
# when the input chunks aren't a multiple of the target's chunks.
da.store(patch, zarr_array, regions=slice_tuple, lock=False)
return

# Complex case, we have exactly one tuple in the slicing tuple
Expand All @@ -260,7 +262,7 @@ def set_slice_as_dask(
_sub_slice = (*slice_tuple[:ax], slice(idx, idx + 1), *slice_tuple[ax + 1 :])
sub_patch = da.take(patch, indices=i, axis=ax)
sub_patch = da.expand_dims(sub_patch, axis=ax)
da.to_zarr(arr=sub_patch, url=zarr_array, region=_sub_slice)
da.store(sub_patch, zarr_array, regions=_sub_slice, lock=False)


##############################################################
Expand Down
6 changes: 3 additions & 3 deletions src/ngio/tables/backends/_anndata.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
import zarr
from anndata import AnnData
from anndata._settings import settings
from pandas import DataFrame
from polars import DataFrame as PolarsDataFrame
from polars import LazyFrame
from zarr.storage import FsspecStore, LocalStore, MemoryStore

from ngio.tables.backends._abstract_backend import AbstractTableBackend
from ngio.tables.backends._anndata_utils import (
_update_anndata_global_settings,
custom_anndata_read_zarr,
)
from ngio.tables.backends._utils import (
Expand Down Expand Up @@ -43,7 +43,7 @@ def implements_polars() -> bool:

def load_as_anndata(self) -> AnnData:
"""Load the table as an AnnData object."""
settings.zarr_write_format = self._group_handler.zarr_format
_update_anndata_global_settings(self._group_handler.zarr_format)
anndata = custom_anndata_read_zarr(self._group_handler._group)
anndata = normalize_anndata(anndata, index_key=self.index_key)
return anndata
Expand Down Expand Up @@ -95,7 +95,7 @@ def _cleanup_after_write(self) -> None:
def write_from_anndata(self, table: AnnData) -> None:
"""Serialize the table from an AnnData object."""
# Make sure to use the correct zarr format
settings.zarr_write_format = self._group_handler.zarr_format
_update_anndata_global_settings(self._group_handler.zarr_format)
store = self._group_handler.store
path = self._group_handler.group.path
if isinstance(store, LocalStore):
Expand Down
26 changes: 25 additions & 1 deletion src/ngio/tables/backends/_anndata_utils.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
from __future__ import annotations

from typing import TYPE_CHECKING, Any
from typing import TYPE_CHECKING, Any, Literal

import zarr
from anndata import AnnData
from anndata._io.specs import read_elem
from anndata._io.utils import _read_legacy_raw
from anndata._io.zarr import read_dataframe
from anndata._settings import settings
from anndata.compat import _clean_uns
from anndata.experimental import read_dispatched

Expand All @@ -21,6 +22,29 @@
from collections.abc import Callable, Sequence


def _update_anndata_global_settings(zarr_format: Literal[2, 3]) -> None:
"""Update global settings for anndata's zarr read/write functions.

This is needed to ensure that anndata uses the correct zarr format when
reading/writing tables.

Args:
zarr_format (Literal[2, 3]): The zarr format version to use.
Must be either 2 or 3.
"""
if zarr_format == 2:
# Added to avoid user issues when writing
# v2 and v3 in the same session
# order matters here, we need to set auto_shard_zarr_v3
# before setting zarr_write_format
settings.auto_shard_zarr_v3 = False
settings.zarr_write_format = 2
else:
settings.zarr_write_format = 3
# Added to avoid user warning in anndata 0.12.14
settings.auto_shard_zarr_v3 = True


def custom_anndata_read_zarr(
store: StoreOrGroup, elem_to_read: Sequence[str] | None = None
) -> AnnData:
Expand Down
10 changes: 8 additions & 2 deletions src/ngio/utils/_zarr_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,8 +482,14 @@ def _zarr_python_copy(src_group: zarr.Group, dest_group: zarr.Group):
overwrite=True,
)
if array.ndim > 0:
dask_array = da.from_zarr(array)
da.to_zarr(dask_array, dst, overwrite=False)
if array.dtype.hasobject or array.dtype.kind in ("U", "S"):
# dask >=2025.11 refuses auto-chunking for object/string dtypes
# (NotImplementedError in dask.array.core.auto_chunks). These
# arrays come from table backends and are small; copy directly.
dst[:] = array[:]
else:
dask_array = da.from_zarr(array)
da.to_zarr(dask_array, dst, overwrite=False)
# Copy subgroups
for name, subgroup in src_group.groups():
dest_subgroup = dest_group.create_group(name, overwrite=True)
Expand Down
Loading