Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions doc/whats-new.rst
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,8 @@ New Features
By `Matthew Willson <https://github.com/mjwillson>`_.
- Better ordering of coordinates when displaying Xarray objects. (:pull:`11098`).
By `Ian Hunt-Isaak <https://github.com/ianhi>`_, `Julia Signell <https://github.com/jsignell>`_.
- Use ``np.dtypes.StringDType`` when reading Zarr string variables (:pull:`11097`).
By `Julia Signell <https://github.com/jsignell>`_.

Breaking Changes
~~~~~~~~~~~~~~~~
Expand Down
4 changes: 4 additions & 0 deletions xarray/backends/zarr.py
Original file line number Diff line number Diff line change
Expand Up @@ -206,6 +206,10 @@ def __init__(self, zarr_array):
not _zarr_v3()
and self._array.filters is not None
and any(filt.codec_id == "vlen-utf8" for filt in self._array.filters)
) or (
_zarr_v3()
and self._array.serializer
and self._array.serializer.to_dict()["name"] == "vlen-utf8"
):
dtype = coding.strings.create_vlen_dtype(str)
else:
Expand Down
4 changes: 3 additions & 1 deletion xarray/core/duck_array_ops.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@

from xarray.compat import dask_array_compat, dask_array_ops
from xarray.compat.array_api_compat import get_array_namespace
from xarray.compat.npcompat import HAS_STRING_DTYPE
from xarray.core import dtypes, nputils
from xarray.core.extension_array import (
PandasExtensionArray,
Expand Down Expand Up @@ -175,9 +176,10 @@ def isnull(data):
# note: must check timedelta64 before integers, because currently
# timedelta64 inherits from np.integer
return isnat(data)
elif HAS_STRING_DTYPE and isinstance(scalar_type, np.dtypes.StringDType):
return xp.isnan(data)
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

StringDType has a custom na_object property. Is that relevant here?

elif dtypes.isdtype(scalar_type, ("real floating", "complex floating"), xp=xp):
# float types use NaN for null
xp = get_array_namespace(data)
return xp.isnan(data)
elif dtypes.isdtype(scalar_type, ("bool", "integral"), xp=xp) or (
isinstance(scalar_type, np.dtype)
Expand Down
4 changes: 3 additions & 1 deletion xarray/tests/test_backends.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
from xarray.coding.cftime_offsets import date_range
from xarray.coding.strings import check_vlen_dtype, create_vlen_dtype
from xarray.coding.variables import SerializationWarning
from xarray.compat.npcompat import HAS_STRING_DTYPE
from xarray.conventions import encode_dataset_coordinates
from xarray.core import indexing
from xarray.core.common import _contains_cftime_datetimes
Expand Down Expand Up @@ -1088,8 +1089,9 @@ def test_roundtrip_empty_vlen_string_array(self) -> None:
# eg. NETCDF3 based backends do not roundtrip metadata
if actual["a"].dtype.metadata is not None:
assert check_vlen_dtype(actual["a"].dtype) is str
elif HAS_STRING_DTYPE:
assert np.issubdtype(actual["a"].dtype, np.dtypes.StringDType())
else:
# zarr v3 sends back "<U1"
assert np.issubdtype(actual["a"].dtype, np.dtype("=U1"))

@pytest.mark.parametrize(
Expand Down
Loading