Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
28 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 9 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,15 @@ files = ["src"]
target-version = "py310"
extend-include = ["*.ipynb"]

# Add this to ignore shapefiles
exclude = [
"shapefiles",
"*.shp",
"*.shx",
"*.dbf",
"*.prj"
]

[tool.ruff.lint]
select = [
"A",
Expand Down
28 changes: 25 additions & 3 deletions src/ewatercycle/_forcings/caravan.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import os
import shutil
import zipfile
from pathlib import Path
Expand All @@ -12,7 +13,7 @@
from ewatercycle.util import get_time

COMMON_URL = "ca13056c-c347-4a27-b320-930c2a4dd207"
OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/1/"
OPENDAP_URL = f"https://opendap.4tu.nl/thredds/dodsC/data2/djht/{COMMON_URL}/2/"
SHAPEFILE_URL = (
f"https://data.4tu.nl/file/{COMMON_URL}/bbe94526-cf1a-4b96-8155-244f20094719"
)
Expand Down Expand Up @@ -106,7 +107,12 @@ class CaravanForcing(DefaultForcing):

@classmethod
def get_dataset(cls: type["CaravanForcing"], dataset: str) -> xr.Dataset:
"""Opens specified dataset from data.4tu.nl OPeNDAP server.
"""Opens dataset from data.4tu.nl OPeNDAP server, or cache if available.

By default, it will open the dataset from data.4tu.nl OPeNDAP server
This can be overridden by having an environmental variable: CARAVAN_CACHE.
Set this variable to the directory containing the netCDF files.


Args:
dataset (str): name of dataset, choose from:
Expand All @@ -118,6 +124,11 @@ def get_dataset(cls: type["CaravanForcing"], dataset: str) -> xr.Dataset:
'hysets',
'lamah'
"""
cache_dir = os.environ.get("CARAVAN_CACHE")
# Check if we want to load from 4TU or dCache
if cache_dir:
cache_dir = cache_dir.rstrip("/") # ensure no trailing slash issues
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or do

xr.open_dataset(Path(cache_dir) / "{dataset}.nc")

return xr.open_dataset(Path(cache_dir) / f"{dataset}.nc")
return xr.open_dataset(f"{OPENDAP_URL}{dataset}.nc")

@classmethod
Expand Down Expand Up @@ -246,7 +257,18 @@ def generate( # type: ignore[override]


def get_shapefiles(directory: Path, basin_id: str) -> Path:
"""Retrieve shapefiles from data 4TU.nl ."""
"""Retrieve shapefiles from data 4TU.nl or cache."""
cache_dir = os.environ.get("CARAVAN_CACHE")
# Check if we want to load from 4TU or dCache
if cache_dir:
shape_path = directory / f"{basin_id}.shp"
combined_shapefile_path = Path(cache_dir) / "shapefiles" / "combined.shp"

if not shape_path.is_file():
extract_basin_shapefile(basin_id, combined_shapefile_path, shape_path)

return shape_path

zip_path = directory / "shapefiles.zip"
output_path = directory / "shapefiles"
shape_path = directory / f"{basin_id}.shp"
Expand Down
2 changes: 1 addition & 1 deletion tests/src/base/forcing_files/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,6 @@ The data only includes a year of forcing for one catchment.

For own use, please download from the original source and cite correctly. The Caravan dataset itself is also a combination of data from seperate sources.

The Carvan dataset is originanly obtained from https://zenodo.org/records/7944025 and is explained in a paper by Kratzert, F. :'Caravan - A global community dataset for large-sample hydrology' found here: https://doi-org.tudelft.idm.oclc.org/10.1038/s41597-023-01975-w
The Caravan dataset is originally obtained from https://zenodo.org/records/7944025 and is explained in a paper by Kratzert, F. :'Caravan - A global community dataset for large-sample hydrology' found here: https://doi-org.tudelft.idm.oclc.org/10.1038/s41597-023-01975-w

Distributed under Creative Commons Attribution 4.0 International.
2 changes: 1 addition & 1 deletion tests/src/base/forcing_files/camels_03439000.cpg
Original file line number Diff line number Diff line change
@@ -1 +1 @@
ISO-8859-1
ISO-8859-1
2 changes: 1 addition & 1 deletion tests/src/base/forcing_files/camels_03439000.prj
Original file line number Diff line number Diff line change
@@ -1 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file modified tests/src/base/forcing_files/camels_03439000.shx
Binary file not shown.
1 change: 1 addition & 0 deletions tests/src/base/forcing_files/shapefiles/combined.cpg
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
UTF-8
Binary file added tests/src/base/forcing_files/shapefiles/combined.dbf
Binary file not shown.
1 change: 1 addition & 0 deletions tests/src/base/forcing_files/shapefiles/combined.prj
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
GEOGCS["GCS_WGS_1984",DATUM["D_WGS_1984",SPHEROID["WGS_1984",6378137.0,298.257223563]],PRIMEM["Greenwich",0.0],UNIT["Degree",0.0174532925199433]]
Binary file not shown.
Binary file added tests/src/base/forcing_files/shapefiles/combined.shx
Binary file not shown.
38 changes: 38 additions & 0 deletions tests/src/base/test_forcing.py
Original file line number Diff line number Diff line change
Expand Up @@ -427,3 +427,41 @@ def test_extract_basin_shapefile(tmp_path: Path):

assert len(records) == 1
assert records[0].attributes["gauge_id"] == basin_id


def test_get_dataset_using_cache(tmp_path, monkeypatch):
# Prepare cache directory
cache_dir = tmp_path / "cache"
cache_dir.mkdir()

basin_id = "camels_01022500"
# Use the existing fake Caravan dataset
test_files_dir = Path(__file__).parent / "forcing_files"
test_file = test_files_dir / "test_caravan_file.nc"
cache_target = cache_dir / "camels.nc"
cache_target.write_bytes(test_file.read_bytes())

# Copy shapefiles into the cache so Fiona can find them
shapefiles_dir = test_files_dir / "shapefiles"
cache_shapefiles_dir = cache_dir / "shapefiles"
copytree(shapefiles_dir, cache_shapefiles_dir)

# Point CARAVAN_CACHE to this directory
monkeypatch.setenv("CARAVAN_CACHE", str(cache_dir))

# Copy other forcing files to tmp_camels_dir
tmp_camels_dir = tmp_path / "camels"
copytree(test_files_dir, tmp_camels_dir)

# Call the method
ds = CaravanForcing.generate(
start_time="1981-01-01T00:00:00Z",
end_time="1981-03-01T00:00:00Z",
directory=str(tmp_camels_dir),
basin_id=basin_id,
).to_xarray()

# Assert that the file was loaded from cache
content = list(ds.data_vars.keys())
expected = ["Q", "evspsblpot", "pr", "tas", "tasmax", "tasmin"]
assert content == expected
Loading