Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 13 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,19 @@
# Changelog

All notable changes to this project will be documented in this file.
## [v2605.0.0]
## Fixed
- `FileNotFoundError` for missing local files instead of misleading "cannot detect format"
- `storage_options` leaking into posix backends (cfgrib, scipy, netcdf4, rasterio), causing TypeError
- cfgrib .idx files now written to prism cache dir instead of next to the original file
## Removed
- line-clearing ANSI escape machinery unreliable across terminals and notebooks
- logging.basicConfig call from library code
## Changed
- detection log messages from INFO to DEBUG
## Added
- `XARRAY_PRISM_LOG_LEVEL` env var to control log verbosity

## [v2603.0.0]
## Fixed
- an issue regading passing the storage_options to aiohttp
Expand Down
14 changes: 14 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -176,6 +176,20 @@ xp.clear_cache(max_age_days=0, max_size_gb=0)
> | TTL (last-access) | 7 days | `XARRAY_PRISM_MAX_AGE_DAYS=N` |
> | Size cap (LRU) | 10 GB | `XARRAY_PRISM_MAX_SIZE_GB=N` |

### Logging

By default xarray-prism is silent (`WARNING` level). Set `XARRAY_PRISM_LOG_LEVEL` to change verbosity:

```bash
# Show detection and open steps
XARRAY_PRISM_LOG_LEVEL=DEBUG python my_script.py

# Suppress everything except errors
XARRAY_PRISM_LOG_LEVEL=ERROR python my_script.py
```

Accepted values: `DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`.


## Customization

Expand Down
15 changes: 15 additions & 0 deletions src/xarray_prism/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
# cfgrib and scipy typically must read the entire file (and build
# its index) even when only a small subset is requested.

import logging
import os

from ._cache import cache_info, clear_cache
from ._detection import (
detect_engine,
Expand All @@ -23,6 +26,18 @@
from ._registry import registry
from ._version import __version__ # noqa

_level = getattr(
logging,
os.environ.get("XARRAY_PRISM_LOG_LEVEL", "WARNING").upper(),
logging.WARNING,
)
_logger = logging.getLogger("xarray_prism")
_logger.setLevel(_level)
if _level < logging.WARNING and not _logger.handlers:
_handler = logging.StreamHandler()
_handler.setFormatter(logging.Formatter("%(name)s %(levelname)s: %(message)s"))
_logger.addHandler(_handler)

__all__ = [
"PrismBackendEntrypoint",
"detect_engine",
Expand Down
11 changes: 1 addition & 10 deletions src/xarray_prism/_cache.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,6 @@

import logging
import os
import sys
import tempfile
import time
from hashlib import md5
Expand Down Expand Up @@ -46,7 +45,6 @@ def cache_remote_file(
engine: str,
storage_options: Optional[Dict] = None,
show_progress: bool = True,
lines_above: int = 0,
) -> str:
"""Cache remote file to local."""
import fsspec
Expand All @@ -60,18 +58,11 @@ def cache_remote_file(
local_path = cache_root / cache_name

if local_path.exists():
if show_progress and lines_above > 0:
for _ in range(lines_above):
sys.stdout.write("\033[A")
sys.stdout.write("\033[K")
sys.stdout.flush()
return _decompress_if_needed(str(local_path))

extra_lines = 0
if show_progress:
fmt = "GRIB" if engine == "cfgrib" else "NetCDF3"
logger.warning(f"Remote {fmt} requires full file download")
extra_lines = 2

fs, path = fsspec.core.url_to_fs(
uri, **_strip_chaining_options(storage_options or {})
Expand All @@ -89,7 +80,7 @@ def cache_remote_file(
display_name = display_name[:32] + "..."
desc = f" Downloading {display_name}"

with ProgressBar(desc=desc, lines_above=lines_above + extra_lines) as progress:
with ProgressBar(desc=desc) as progress:
progress.set_size(size)
with fs.open(path, "rb") as src, open(local_path, "wb") as dst:
while True:
Expand Down
2 changes: 1 addition & 1 deletion src/xarray_prism/_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,3 @@
"""Version information for xarray-prism."""

__version__ = "2603.0.0"
__version__ = "2605.0.0"
24 changes: 3 additions & 21 deletions src/xarray_prism/backends/cloud.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,11 @@
from __future__ import annotations

import logging
import sys
from typing import Any, Dict, Optional

from .._cache import cache_remote_file

logger = logging.getLogger(__name__)
logging.basicConfig(level=logging.INFO)


def open_cloud(
Expand All @@ -18,7 +16,6 @@ def open_cloud(
drop_variables: Optional[Any] = None,
backend_kwargs: Optional[Dict[str, Any]] = None,
show_progress: bool = True,
lines_above: int = 0,
**kwargs,
) -> Any:
"""Open remote file with detected engine."""
Expand All @@ -28,21 +25,11 @@ def open_cloud(

storage_options = kwargs.pop("storage_options", None)

def _clear_lines():
"""Clear the detection message lines."""
if lines_above > 0:
for _ in range(lines_above):
sys.stdout.write("\033[A")
sys.stdout.write("\033[K")
sys.stdout.flush()

bk = backend_kwargs or None

# GRIB / NetCDF3: must download the full file first
if engine in ("cfgrib", "scipy"):
local_path = cache_remote_file(
uri, engine, storage_options, show_progress, lines_above
)
local_path = cache_remote_file(uri, engine, storage_options, show_progress)
return xr.open_dataset(
local_path,
engine=engine,
Expand All @@ -53,30 +40,26 @@ def _clear_lines():

# NetCDF4 (OPeNDAP)
if engine == "netcdf4":
ds = xr.open_dataset(
return xr.open_dataset(
uri,
engine=engine,
drop_variables=drop_variables,
backend_kwargs=bk,
**kwargs,
)
_clear_lines()
return ds

# Rasterio: translate storage_options -> GDAL env vars
if engine == "rasterio":
from ..utils import sanitize_rasterio_kwargs

with gdal_env(storage_options):
ds = xr.open_dataset(
return xr.open_dataset(
uri,
engine=engine,
drop_variables=drop_variables,
backend_kwargs=bk,
**sanitize_rasterio_kwargs(kwargs),
)
_clear_lines()
return ds

# Zarr, h5netcdf
ds = xr.open_dataset(
Expand All @@ -91,5 +74,4 @@ def _clear_lines():
from ..utils import sanitize_dataset_attrs

return sanitize_dataset_attrs(ds)
_clear_lines()
return ds
15 changes: 15 additions & 0 deletions src/xarray_prism/backends/posix.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@

from __future__ import annotations

import os
from typing import Any, Dict, Optional


Expand All @@ -16,6 +17,20 @@ def open_posix(
"""Open local file with detected engine."""
import xarray as xr

# the following posix backends don't accept storage_options
_NO_STORAGE_OPTIONS = frozenset({"cfgrib", "scipy", "netcdf4", "rasterio"})
if engine in _NO_STORAGE_OPTIONS:
kwargs.pop("storage_options", None)

if engine == "cfgrib":
from .._cache import get_cache_dir

bk = dict(backend_kwargs or {})
if "indexpath" not in bk:
basename = os.path.basename(uri)
bk["indexpath"] = str(get_cache_dir() / f"{basename}.{{short_hash}}.idx")
backend_kwargs = bk

if engine == "rasterio":
from ..utils import sanitize_rasterio_kwargs

Expand Down
19 changes: 7 additions & 12 deletions src/xarray_prism/entrypoint.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,21 +102,17 @@ def open_dataset(
)

uri = str(filename_or_obj)

is_remote = "://" in uri and not uri.startswith("file://")
lines_printed = 0

if is_remote:
logger.info("Detecting format...")
sys.stdout.flush()
uri_type = detect_uri_type(uri)
if uri_type == "posix":
posix_path = uri[7:] if uri.startswith("file://") else uri
if not os.path.exists(posix_path):
raise FileNotFoundError(f"Xarray Prism: file not found: {uri!r}")

logger.debug("Detecting format for %s", uri)
engine, uri_type = self._detect(uri, **kwargs)

if is_remote and engine:
logger.info(f"Detected: {engine}")
lines_printed = 1
sys.stdout.write("\r" + " " * 25 + "\r")
sys.stdout.flush()
logger.debug("Detected engine=%s uri_type=%s", engine, uri_type)

if engine is None:
if is_remote:
Expand Down Expand Up @@ -187,7 +183,6 @@ def open_dataset(
engine=engine,
drop_variables=drop_variables,
backend_kwargs=backend_kwargs,
lines_above=lines_printed,
**kwargs,
)
else:
Expand Down
11 changes: 1 addition & 10 deletions src/xarray_prism/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,17 +30,14 @@
class ProgressBar:
"""Progress bar to display cache download progress."""

def __init__(
self, desc: str = "Downloading", width: int = 40, lines_above: int = 0
):
def __init__(self, desc: str = "Downloading", width: int = 40):
self.desc = desc
self.width = width
self._total = 0
self._current = 0
self._spinner = 0
self._spinner_chars = "⠋⠙⠹⠸⠼⠴⠦⠧⠇⠏"
self._last_line_len = 0
self._lines_above = lines_above

def set_size(self, size: int) -> None:
self._total = size if size else 0
Expand Down Expand Up @@ -74,12 +71,6 @@ def __enter__(self):
def __exit__(self, *args):
# Clear the progress line
sys.stdout.write("\r" + " " * self._last_line_len + "\r")

# Clear detection + warning messages
for _ in range(self._lines_above):
sys.stdout.write("\033[A")
sys.stdout.write("\033[K")

sys.stdout.flush()


Expand Down
Loading