Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]

### Added

- Converter for Bavaria, Germany

### Changed

- Change implicit `block_size=0` through URL query parameter to explicit attribute in converter class
- Improve Converter template.py usability

## [v0.10.0] - 2025-03-11
Expand Down
16 changes: 10 additions & 6 deletions fiboa_cli/convert_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -125,7 +125,7 @@ def add_asset_to_collection(collection, output_file, rows=None, columns=None):


def stream_file(fs, src_uri, dst_file, chunk_size=10 * 1024 * 1024):
with fs.open(src_uri, mode="rb") as f:
with fs.open(src_uri, mode="rb", block_size=0) as f:
while True:
chunk = f.read(chunk_size)
if not chunk:
Expand Down Expand Up @@ -172,6 +172,7 @@ class BaseConverter:
source_variants: Optional[dict[dict[str, str] | str]] = None
variant: str = None
open_options = {}
avoid_range_request = False
years: Optional[dict[dict[int, str] | str]] = None
year: str = None

Expand Down Expand Up @@ -218,7 +219,7 @@ def layer_filter(self, layer: str, uri: str) -> bool:
def post_migrate(self, gdf: gpd.GeoDataFrame) -> gpd.GeoDataFrame:
return gdf

def get_cache(self, cache_folder=None, force=False):
def get_cache(self, cache_folder=None, force=False, **kwargs):
if cache_folder is None:
if not force:
return None, None
Expand All @@ -228,12 +229,12 @@ def get_cache(self, cache_folder=None, force=False):
with TemporaryDirectory(**_kwargs) as tmp_folder:
cache_folder = tmp_folder

cache_fs = get_fs(cache_folder)
cache_fs = get_fs(cache_folder, **kwargs)
if not cache_fs.exists(cache_folder):
cache_fs.makedirs(cache_folder)
return cache_fs, cache_folder

def download_files(self, uris, cache_folder=None):
def download_files(self, uris, cache_folder=None, **kwargs):
"""Download (and cache) files from various sources"""
if isinstance(uris, str):
uris = {uris: name_from_uri(uris)}
Expand All @@ -249,7 +250,7 @@ def download_files(self, uris, cache_folder=None):
else:
name = target

source_fs = get_fs(uri)
source_fs = get_fs(uri, **kwargs)
cache_fs, cache_folder = self.get_cache(cache_folder, force=True)

if isinstance(source_fs, LocalFileSystem):
Expand Down Expand Up @@ -501,7 +502,10 @@ def convert(
raise ValueError("No input files provided")

log("Getting file(s) if not cached yet")
paths = self.download_files(urls, cache)
request_args = {}
if self.avoid_range_request:
request_args["block_size"] = 0
paths = self.download_files(urls, cache, **request_args)

kwargs.update(self.open_options)
gdf = self.read_data(paths, **kwargs)
Expand Down
2 changes: 1 addition & 1 deletion fiboa_cli/converter_rest.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ def get_data(self, paths, **kwargs):
return super().get_data(paths, **kwargs)

base_url = paths[0] # loop over paths to support more than 1 source
source_fs = get_fs(base_url)
source_fs = get_fs(base_url, **kwargs)
cache_fs, cache_folder = self.get_cache(self.cache_folder)

service_metadata = requests.get(base_url, {"f": "pjson"}).json()
Expand Down
3 changes: 2 additions & 1 deletion fiboa_cli/datasets/be_wal.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@

class Converter(AdminConverterMixin, BaseConverter):
sources = {
"https://geoservices.wallonie.be/geotraitement/spwdatadownload/get/2a0d9be0-ac3d-443e-9db0-a7cfb0f128e2/LU_ExistingLandUse_SIGEC2022.gml.zip?blocksize=0": [
"https://geoservices.wallonie.be/geotraitement/spwdatadownload/get/2a0d9be0-ac3d-443e-9db0-a7cfb0f128e2/LU_ExistingLandUse_SIGEC2022.gml.zip": [
"LU_ExistingLandUse_SIGEC2022.gml"
]
}
avoid_range_request = True
id = "be_wal"
admin_region_code = "WAL"
short_name = "Belgium, Wallonia"
Expand Down
57 changes: 57 additions & 0 deletions fiboa_cli/datasets/de_by.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
from ..convert_utils import BaseConverter
from .commons.admin import AdminConverterMixin


class Converter(AdminConverterMixin, BaseConverter):
sources = "https://geodaten.bayern.de/odd/m/3/daten/ln/landnutzung.gpkg"
avoid_range_request = True

id = "de_by"
admin_subdivision_code = "BY"
short_name = "Germany, Bavaria"
title = "Field boundaries for Bavaria, Germany"
description = """A field block (German: "Feldblock") is a contiguous agricultural area surrounded by permanent boundaries, which is cultivated by one or more farmers with one or more crops, is fully or partially set aside or is fully or partially taken out of production."""
license = "CC-BY-4.0"
attribution = "Datenquelle: Bayerische Vermessungsverwaltung – www.geodaten.bayern.de"
providers = [
{
"name": "Bayerische Vermessungsverwaltung",
"url": "https://www.ldbv.bayern.de",
"roles": ["producer", "licensor"],
}
]
extensions = {"https://fiboa.github.io/flik-extension/v0.1.0/schema.yaml"}

columns = {
"geometry": "geometry",
"objid": ["id", "flik"],
"datumderletztenueberpruefung": "determination_datetime",
"beginnt": "datetime:first_determination",
"bewirtschaftung": "cultivation",
# "artderbetriebsflaeche": "artderbetriebsflaeche",
# "name": "name",
# "istweiterenutzung": "istweiterenutzung",
# "mappingannahme": "mappingannahme",
"quellobjektid": "source_id",
}
missing_schemas = {
"properties": {
"datetime:first_determination": {"type": "date-time"},
"cultivation": {"type": "string"},
# "artderbetriebsflaeche": {"type": "string"},
# "name": {"type": "string"},
# "istweiterenutzung": {"type": "string"},
# "mappingannahme": {"type": "boolean"},
"source_id": {"type": "string"},
}
}

column_filters = {
# see https://www.adv-online.de/GeoInfoDok/Aktuelle-Anwendungsschemata/Landnutzung-1.0.2/binarywriterservlet?imgUid=be12989a-7b60-5819-393b-216067bef8a0&uBasVariant=11111111-1111-1111-1111-111111111111#_C10573-_A10573_44376
"bewirtschaftung": lambda col: col.isin(
["1010", "1011", "1012", "1013", "1014", "1030", "1040", "1050"]
)
}

def layer_filter(self, layer: str, uri: str) -> bool:
return layer == "ln_landwirtschaft"
7 changes: 4 additions & 3 deletions fiboa_cli/datasets/es_ar.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,11 @@ class ARConverter(ESBaseConverter):
# These files can be annoying to download (web server failure, no http-range support for continuation)
# Alternative is to download the files by municipality, check the atom.xml
sources = {
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec22_sigpac.shp.zip&blocksize=0": "rec22_sigpac.shp.zip",
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec44_sigpac.shp.zip&blocksize=0": "rec44_sigpac.shp.zip",
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec50_sigpac.shp.zip&blocksize=0": "rec50_sigpac.shp.zip",
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec22_sigpac.shp.zip": "rec22_sigpac.shp.zip",
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec44_sigpac.shp.zip": "rec44_sigpac.shp.zip",
"https://icearagon.aragon.es/datosdescarga/descarga.php?file=/CartoTema/sigpac/rec50_sigpac.shp.zip": "rec50_sigpac.shp.zip",
}
avoid_range_request = True

id = "es_ar"
short_name = "Spain Aragon"
Expand Down
2 changes: 1 addition & 1 deletion fiboa_cli/datasets/es_nc.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,4 +72,4 @@ def download_files(self, uris, cache_folder=None):
# Hostname has invalid SSL, prefill cache and avoid ssl-errors
self.prefill_cache(uris, cache_folder)

return super().download_files(uris, cache_folder=cache_folder)
return super().download_files(uris, cache_folder)
2 changes: 1 addition & 1 deletion fiboa_cli/datasets/sk.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@

class Converter(AdminConverterMixin, BaseConverter):
sources = {
"https://data.slovensko.sk/download?id=e39ad227-1899-4cff-b7c8-734f90aa0b59&blocksize=0": [
"https://data.slovensko.sk/download?id=e39ad227-1899-4cff-b7c8-734f90aa0b59": [
"HU2024_20240917shp/HU2024_20240917.shp"
]
}
Expand Down
14 changes: 5 additions & 9 deletions fiboa_cli/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -103,28 +103,24 @@ def load_datatypes(version):
return response["$defs"]


def get_fs(url_or_path: str) -> AbstractFileSystem:
def get_fs(url_or_path: str, **kwargs) -> AbstractFileSystem:
"""Choose fsspec filesystem by sniffing input url"""
parsed = urlparse(url_or_path)

if parsed.scheme in ("http", "https"):
if re.search(r"[?&]blocksize=0", url_or_path):
# We read in chunks. Some origin-server don't support http-range request
# Add an additional blocksize=0 parameter to your url for a workaround
return HTTPFileSystem(block_size=0)
return HTTPFileSystem()
return HTTPFileSystem(**kwargs)

if parsed.scheme == "s3":
from s3fs import S3FileSystem

return S3FileSystem()
return S3FileSystem(**kwargs)

if parsed.scheme == "gs":
from gcsfs import GCSFileSystem

return GCSFileSystem()
return GCSFileSystem(**kwargs)

return LocalFileSystem()
return LocalFileSystem(**kwargs)


def is_valid_file_uri(uri, extensions=[]):
Expand Down
Loading