Skip to content

Commit 798243b

Browse files
authored
get volume and metadata by name, add iterator for datasets (#237)
* get volume and metadata by name, add iterator for datasets * update tests * update test * adapt tests and global exported methds * update examples * linter reorder * add dataset_metadata_by_name to test
1 parent 08bbbde commit 798243b

File tree

5 files changed

+231
-92
lines changed

5 files changed

+231
-92
lines changed

examples/add_public_raster_dataset.ipynb

Lines changed: 51 additions & 25 deletions
Large diffs are not rendered by default.

examples/add_public_vector_dataset.ipynb

Lines changed: 43 additions & 17 deletions
Large diffs are not rendered by default.

geoengine/__init__.py

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,9 +33,11 @@
3333
add_dataset,
3434
add_or_replace_dataset_with_permissions,
3535
dataset_info_by_name,
36+
dataset_metadata_by_name,
3637
delete_dataset,
3738
list_datasets,
3839
upload_dataframe,
40+
volume_by_name,
3941
volumes,
4042
)
4143
from .error import (

geoengine/datasets.py

Lines changed: 72 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import tempfile
88
from abc import abstractmethod
9+
from collections.abc import Iterator
910
from enum import Enum
1011
from pathlib import Path
1112
from typing import Literal, NamedTuple
@@ -286,7 +287,8 @@ class AddDatasetProperties:
286287
name: str | None
287288
display_name: str
288289
description: str
289-
source_operator: Literal["GdalSource", "OgrSource"] # TODO: add more operators
290+
# TODO: add more operators
291+
source_operator: Literal["GdalSource", "OgrSource"]
290292
symbology: RasterSymbology | None # TODO: add vector symbology if needed
291293
provenance: list[Provenance] | None
292294

@@ -537,6 +539,20 @@ def volumes(timeout: int = 60) -> list[Volume]:
537539
return [Volume.from_response(v) for v in response]
538540

539541

542+
def volume_by_name(volume_name: str, timeout: int = 60) -> Volume | None:
543+
"""Returns a volume with the specified name or None if none exists"""
544+
vols = volumes(timeout)
545+
vols = [v for v in vols if v.name == volume_name]
546+
547+
if len(vols) == 0:
548+
return None
549+
550+
if len(vols) > 1:
551+
raise KeyError(f"Volume name {volume_name} is not unique")
552+
553+
return vols[0]
554+
555+
540556
def add_dataset(
541557
data_store: Volume | UploadId,
542558
properties: AddDatasetProperties,
@@ -619,7 +635,7 @@ class DatasetListOrder(Enum):
619635
NAME_DESC = "NameDesc"
620636

621637

622-
def list_datasets(
638+
def list_datasets_page(
623639
offset: int = 0,
624640
limit: int = 20,
625641
order: DatasetListOrder = DatasetListOrder.NAME_ASC,
@@ -643,6 +659,38 @@ def list_datasets(
643659
return response
644660

645661

662+
def list_datasets(
663+
offset: int = 0,
664+
limit: int = 200,
665+
order: DatasetListOrder = DatasetListOrder.NAME_ASC,
666+
name_filter: str | None = None,
667+
timeout: int = 60,
668+
) -> Iterator[geoengine_openapi_client.DatasetListing]:
669+
"""List datasets"""
670+
671+
page_size = 20
672+
page_count = 0
673+
674+
while True:
675+
element_num = page_size * page_count
676+
677+
if element_num >= limit:
678+
break
679+
680+
page = list_datasets_page(
681+
element_num + offset, page_size, order=order, name_filter=name_filter, timeout=timeout
682+
)
683+
page_count += 1
684+
685+
if len(page) == 0:
686+
break
687+
688+
for c, p in enumerate(page):
689+
if element_num + c > limit:
690+
break
691+
yield p
692+
693+
646694
def dataset_info_by_name(
647695
dataset_name: DatasetName | str, timeout: int = 60
648696
) -> geoengine_openapi_client.models.Dataset | None:
@@ -663,3 +711,25 @@ def dataset_info_by_name(
663711
if isinstance(e_body, str) and "CannotLoadDataset" not in e_body:
664712
raise e
665713
return res
714+
715+
716+
def dataset_metadata_by_name(
717+
dataset_name: DatasetName | str, timeout: int = 60
718+
) -> geoengine_openapi_client.models.MetaDataDefinition | None:
719+
"""Get dataset information."""
720+
721+
if not isinstance(dataset_name, DatasetName):
722+
dataset_name = DatasetName(dataset_name)
723+
724+
session = get_session()
725+
726+
with geoengine_openapi_client.ApiClient(session.configuration) as api_client:
727+
datasets_api = geoengine_openapi_client.DatasetsApi(api_client)
728+
res = None
729+
try:
730+
res = datasets_api.get_loading_info_handler(str(dataset_name), _request_timeout=timeout)
731+
except geoengine_openapi_client.exceptions.BadRequestException as e:
732+
e_body = e.body
733+
if isinstance(e_body, str) and "CannotLoadDataset" not in e_body:
734+
raise e
735+
return res

tests/test_datasets.py

Lines changed: 63 additions & 48 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,8 @@ def test_list_datasets(self):
3131
offset=0, limit=10, order=ge.DatasetListOrder.NAME_ASC, name_filter="Natural Earth II"
3232
)
3333

34+
datasets = list(datasets)
35+
3436
self.assertEqual(len(datasets), 3)
3537

3638
dataset = datasets[0]
@@ -48,7 +50,7 @@ def test_add_dataset(self):
4850

4951
ge.initialize(ge_instance.address(), credentials=("admin@localhost", "adminadmin"))
5052

51-
volumes = ge.volumes()
53+
volume = ge.volume_by_name("test_data")
5254

5355
geo_transform = ge.GeoTransform(x_min=180.0, y_max=90.0, x_pixel_size=0.1, y_pixel_size=-0.1)
5456

@@ -71,23 +73,23 @@ def test_add_dataset(self):
7173
result_descriptor_measurement = ge.ClassificationMeasurement(
7274
measurement="Land Cover",
7375
classes={
74-
"0": "Water Bodies",
75-
"1": "Evergreen Needleleaf Forests",
76-
"2": "Evergreen Broadleaf Forests",
77-
"3": "Deciduous Needleleaf Forests",
78-
"4": "Deciduous Broadleleaf Forests",
79-
"5": "Mixed Forests",
80-
"6": "Closed Shrublands",
81-
"7": "Open Shrublands",
82-
"8": "Woody Savannas",
83-
"9": "Savannas",
84-
"10": "Grasslands",
85-
"11": "Permanent Wtlands",
86-
"12": "Croplands",
87-
"13": "Urban and Built-Up",
88-
"14": "Cropland-Natural Vegetation Mosaics",
89-
"15": "Snow and Ice",
90-
"16": "Barren or Sparsely Vegetated",
76+
0: "Water Bodies",
77+
1: "Evergreen Needleleaf Forests",
78+
2: "Evergreen Broadleaf Forests",
79+
3: "Deciduous Needleleaf Forests",
80+
4: "Deciduous Broadleleaf Forests",
81+
5: "Mixed Forests",
82+
6: "Closed Shrublands",
83+
7: "Open Shrublands",
84+
8: "Woody Savannas",
85+
9: "Savannas",
86+
10: "Grasslands",
87+
11: "Permanent Wtlands",
88+
12: "Croplands",
89+
13: "Urban and Built-Up",
90+
14: "Cropland-Natural Vegetation Mosaics",
91+
15: "Snow and Ice",
92+
16: "Barren or Sparsely Vegetated",
9193
},
9294
)
9395

@@ -105,6 +107,7 @@ def test_add_dataset(self):
105107
"time": None,
106108
"params": gdal_params,
107109
"resultDescriptor": result_descriptor.to_api_dict().to_dict(),
110+
"cacheTtl": 0,
108111
}
109112
)
110113

@@ -138,16 +141,28 @@ def test_add_dataset(self):
138141
],
139142
)
140143

144+
metadata_for_api = geoengine_openapi_client.MetaDataDefinition(
145+
meta_data,
146+
)
147+
141148
dataset_name = ge.add_dataset(
142-
volumes[0],
149+
volume,
143150
add_dataset_properties,
144-
geoengine_openapi_client.MetaDataDefinition(
145-
meta_data,
146-
),
151+
metadata_for_api,
147152
)
148153

149154
self.assertEqual(dataset_name, ge.DatasetName("MCD12C1_test"))
150-
self.assertEqual(len(ge.list_datasets(name_filter="Land Cover TEST")), 1)
155+
self.assertEqual(len(list(ge.list_datasets(name_filter="Land Cover TEST"))), 1)
156+
157+
metadata_from_api = ge.dataset_metadata_by_name(dataset_name)
158+
self.assertEqual(
159+
metadata_from_api.actual_instance.result_descriptor, metadata_for_api.actual_instance.result_descriptor
160+
)
161+
self.assertTrue(
162+
metadata_from_api.actual_instance.params.file_path.endswith(
163+
metadata_for_api.actual_instance.params.file_path
164+
)
165+
)
151166

152167
def test_add_dataset_with_permissions(self):
153168
"""Test `add_datset`."""
@@ -158,7 +173,7 @@ def test_add_dataset_with_permissions(self):
158173

159174
ge.initialize(ge_instance.address(), credentials=("admin@localhost", "adminadmin"))
160175

161-
volumes = ge.volumes()
176+
volume = ge.volume_by_name("test_data")
162177

163178
geo_transform = ge.GeoTransform(x_min=180.0, y_max=90.0, x_pixel_size=0.1, y_pixel_size=-0.1)
164179

@@ -181,23 +196,23 @@ def test_add_dataset_with_permissions(self):
181196
result_descriptor_measurement = ge.ClassificationMeasurement(
182197
measurement="Land Cover",
183198
classes={
184-
"0": "Water Bodies",
185-
"1": "Evergreen Needleleaf Forests",
186-
"2": "Evergreen Broadleaf Forests",
187-
"3": "Deciduous Needleleaf Forests",
188-
"4": "Deciduous Broadleleaf Forests",
189-
"5": "Mixed Forests",
190-
"6": "Closed Shrublands",
191-
"7": "Open Shrublands",
192-
"8": "Woody Savannas",
193-
"9": "Savannas",
194-
"10": "Grasslands",
195-
"11": "Permanent Wtlands",
196-
"12": "Croplands",
197-
"13": "Urban and Built-Up",
198-
"14": "Cropland-Natural Vegetation Mosaics",
199-
"15": "Snow and Ice",
200-
"16": "Barren or Sparsely Vegetated",
199+
0: "Water Bodies",
200+
1: "Evergreen Needleleaf Forests",
201+
2: "Evergreen Broadleaf Forests",
202+
3: "Deciduous Needleleaf Forests",
203+
4: "Deciduous Broadleleaf Forests",
204+
5: "Mixed Forests",
205+
6: "Closed Shrublands",
206+
7: "Open Shrublands",
207+
8: "Woody Savannas",
208+
9: "Savannas",
209+
10: "Grasslands",
210+
11: "Permanent Wtlands",
211+
12: "Croplands",
212+
13: "Urban and Built-Up",
213+
14: "Cropland-Natural Vegetation Mosaics",
214+
15: "Snow and Ice",
215+
16: "Barren or Sparsely Vegetated",
201216
},
202217
)
203218

@@ -244,7 +259,7 @@ def test_add_dataset_with_permissions(self):
244259
permisions = [(REGISTERED_USER_ROLE_ID, Permission.READ)]
245260

246261
dataset_name = ge.add_or_replace_dataset_with_permissions(
247-
volumes[0],
262+
volume,
248263
add_dataset_properties,
249264
geoengine_openapi_client.MetaDataDefinition(
250265
meta_data,
@@ -253,7 +268,7 @@ def test_add_dataset_with_permissions(self):
253268
)
254269

255270
self.assertEqual(dataset_name, ge.DatasetName("MCD12C1_test"))
256-
self.assertEqual(len(ge.list_datasets(name_filter="Land Cover TEST")), 1)
271+
self.assertEqual(len(list(ge.list_datasets(name_filter="Land Cover TEST"))), 1)
257272
dataset_info = ge.dataset_info_by_name(ge.DatasetName("MCD12C1_test"))
258273
self.assertEqual(dataset_info.name, "MCD12C1_test")
259274
self.assertEqual(dataset_info.description, "Land Cover")
@@ -292,7 +307,7 @@ def test_add_dataset_with_permissions(self):
292307
)
293308

294309
dataset_name = ge.add_or_replace_dataset_with_permissions(
295-
volumes[0],
310+
volume,
296311
add_dataset_properties,
297312
geoengine_openapi_client.MetaDataDefinition(
298313
meta_data,
@@ -301,7 +316,7 @@ def test_add_dataset_with_permissions(self):
301316
)
302317

303318
self.assertEqual(dataset_name, ge.DatasetName("MCD12C1_test"))
304-
self.assertEqual(len(ge.list_datasets(name_filter="Land Cover TEST")), 1)
319+
self.assertEqual(len(list(ge.list_datasets(name_filter="Land Cover TEST"))), 1)
305320
dataset_info = ge.dataset_info_by_name(ge.DatasetName("MCD12C1_test"))
306321
self.assertEqual(dataset_info.name, "MCD12C1_test")
307322
self.assertEqual(
@@ -334,7 +349,7 @@ def test_add_dataset_with_permissions(self):
334349
)
335350

336351
dataset_name = ge.add_or_replace_dataset_with_permissions(
337-
volumes[0],
352+
volume,
338353
add_dataset_properties,
339354
geoengine_openapi_client.MetaDataDefinition(
340355
meta_data,
@@ -344,8 +359,8 @@ def test_add_dataset_with_permissions(self):
344359
)
345360

346361
self.assertEqual(dataset_name, ge.DatasetName("MCD12C1_test"))
347-
self.assertEqual(len(ge.list_datasets(name_filter="Land Cover TEST")), 1)
348-
dataset_info = ge.dataset_info_by_name(ge.DatasetName("MCD12C1_test"))
362+
self.assertEqual(len(list(ge.list_datasets(name_filter="Land Cover TEST"))), 1)
363+
dataset_info = ge.dataset_info_by_name(dataset_name)
349364
self.assertEqual(dataset_info.name, "MCD12C1_test")
350365
self.assertEqual(
351366
dataset_info.description,

0 commit comments

Comments
 (0)