Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 6 additions & 1 deletion exposure/__init__.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,16 @@
"""Exposure module: data sources answering 'what assets are at risk where'."""

from .ghsl import get_ghsl_built
from .models import ExposureGrid
from .models import BuildingCollection, BuildingFootprint, ExposureGrid
from .osm import get_osm_buildings, parse_overpass_response
from .worldpop import get_worldpop_population

__all__ = [
"BuildingCollection",
"BuildingFootprint",
"ExposureGrid",
"get_ghsl_built",
"get_osm_buildings",
"get_worldpop_population",
"parse_overpass_response",
]
7 changes: 7 additions & 0 deletions exposure/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,3 +52,10 @@
Path.home() / ".cache" / "flood_inundation" / "worldpop"
)


# --- OpenStreetMap building footprints via Overpass API --------------------

OVERPASS_URL: str = "https://overpass-api.de/api/interpreter"
OSM_BUFFER_DEG: float = 0.1
OSM_DEFAULT_TIMEOUT_S: int = 60
OSM_USER_AGENT: str = "flood_inundation/0.1 (research; geospatial pipeline)"
36 changes: 35 additions & 1 deletion exposure/models.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""Dataclasses for the exposure module."""

from dataclasses import dataclass
from dataclasses import dataclass, field
from types import MappingProxyType

import numpy as np
from rasterio.transform import Affine
Expand Down Expand Up @@ -29,3 +30,36 @@ class ExposureGrid:
def shape(self) -> tuple[int, int]:
"""Return the (rows, cols) shape of the underlying raster."""
return self.values.shape # type: ignore[return-value]


@dataclass(frozen=True)
class BuildingFootprint:
"""One OSM building polygon with metadata.

`polygon` is an immutable tuple of (lat, lon) vertices in input order
(the closing duplicate may or may not be present). `area_m2` is the
polygon's planar area projected with a cos(lat) factor at the centroid
latitude — accurate to within a fraction of a percent for typical
building-sized polygons. `tags` is a read-only view of the OSM tag dict
so the dataclass stays hashable.
"""

osm_id: int
polygon: tuple[tuple[float, float], ...]
area_m2: float
tags: MappingProxyType # type: ignore[type-arg]


@dataclass
class BuildingCollection:
"""A bbox-bounded collection of OSM buildings."""

buildings: tuple[BuildingFootprint, ...] = field(default_factory=tuple)
bbox: BoundingBox | None = None

def __len__(self) -> int:
return len(self.buildings)

def total_area_m2(self) -> float:
"""Sum of footprint area across all buildings."""
return float(sum(b.area_m2 for b in self.buildings))
149 changes: 149 additions & 0 deletions exposure/osm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,149 @@
"""Fetch OSM building footprints via the Overpass API."""

import json
import math
import urllib.error
import urllib.request
from collections.abc import Iterable
from types import MappingProxyType

from dem.models import BoundingBox
from dem.utils import bbox_from_point

from .constants import (
OSM_BUFFER_DEG,
OSM_DEFAULT_TIMEOUT_S,
OSM_USER_AGENT,
OVERPASS_URL,
)
from .models import BuildingCollection, BuildingFootprint

# 1 degree of latitude is ~111,320 m at the WGS84 surface; longitude
# converges with cos(lat). Both are exact enough for building-scale
# polygons (the geodesic correction is well below 0.1% at these sizes).
_METRES_PER_DEGREE_LAT: float = 111_320.0


def _build_overpass_query(
bbox: BoundingBox,
timeout_seconds: int,
) -> str:
"""Build an Overpass QL query for building ways inside a bbox.

Returns the geometry of each way so we don't need a second roundtrip
to fetch node coordinates.
"""
south = bbox.min_lat
west = bbox.min_lon
north = bbox.max_lat
east = bbox.max_lon
return (
f"[out:json][timeout:{timeout_seconds}];"
f'(way["building"]({south},{west},{north},{east}););'
f"out body geom;"
)


def _polygon_area_m2(polygon: Iterable[tuple[float, float]]) -> float:
"""Approximate polygon area in m^2 from (lat, lon) vertices.

Uses the shoelace formula in a local equirectangular projection
centred on the polygon's mean latitude. Building-sized polygons see
sub-0.1% error from this approximation — far below the noise floor of
the OSM source data itself.
"""
pts = list(polygon)
if len(pts) < 3:
return 0.0
avg_lat = sum(p[0] for p in pts) / len(pts)
metres_per_deg_lon = _METRES_PER_DEGREE_LAT * math.cos(math.radians(avg_lat))

area = 0.0
n = len(pts)
for i in range(n):
lat_i, lon_i = pts[i]
lat_j, lon_j = pts[(i + 1) % n]
x_i = lon_i * metres_per_deg_lon
y_i = lat_i * _METRES_PER_DEGREE_LAT
x_j = lon_j * metres_per_deg_lon
y_j = lat_j * _METRES_PER_DEGREE_LAT
area += x_i * y_j - x_j * y_i
return abs(area) / 2.0


def _element_to_building(element: dict) -> BuildingFootprint | None:
"""Convert one Overpass element to a BuildingFootprint, or None if invalid."""
geometry = element.get("geometry")
if not geometry or len(geometry) < 3:
return None
polygon = tuple((float(g["lat"]), float(g["lon"])) for g in geometry)
return BuildingFootprint(
osm_id=int(element["id"]),
polygon=polygon,
area_m2=_polygon_area_m2(polygon),
tags=MappingProxyType(dict(element.get("tags", {}))),
)


def parse_overpass_response(response_json: dict) -> BuildingCollection:
"""Parse a raw Overpass JSON response into a BuildingCollection.

Exposed as a public helper so test fixtures (saved Overpass responses)
can be loaded without touching the network.
"""
buildings: list[BuildingFootprint] = []
for element in response_json.get("elements", []):
if element.get("type") != "way":
continue
building = _element_to_building(element)
if building is not None:
buildings.append(building)
return BuildingCollection(buildings=tuple(buildings))


def get_osm_buildings(
lat: float,
lon: float,
buffer_deg: float = OSM_BUFFER_DEG,
timeout_seconds: int = OSM_DEFAULT_TIMEOUT_S,
overpass_url: str = OVERPASS_URL,
) -> BuildingCollection:
"""Query Overpass for OSM building polygons in the bbox around a point.

Note: Overpass-API.de has rate limits; do not call this in tight loops.
For repeated runs over the same area, save the JSON response once and
reload via `parse_overpass_response`.

Args:
lat: Latitude of the region center, decimal degrees.
lon: Longitude of the region center, decimal degrees.
buffer_deg: Half-width of the square bbox around the point, degrees.
timeout_seconds: Overpass query timeout (server-side).
overpass_url: Override the Overpass endpoint (e.g. for a private
instance).

Returns:
A BuildingCollection holding every building way the server returned
plus the requested bbox.

Raises:
urllib.error.HTTPError: Overpass server returned a non-2xx response
(e.g. rate limited, malformed query).
"""
bbox = bbox_from_point(lat, lon, buffer_deg)
query = _build_overpass_query(bbox=bbox, timeout_seconds=timeout_seconds)

request = urllib.request.Request(
overpass_url,
data=query.encode("utf-8"),
method="POST",
headers={
"Content-Type": "text/plain; charset=utf-8",
"User-Agent": OSM_USER_AGENT,
},
)
with urllib.request.urlopen(request, timeout=timeout_seconds + 10) as response:
payload = json.loads(response.read())

collection = parse_overpass_response(payload)
return BuildingCollection(buildings=collection.buildings, bbox=bbox)
21 changes: 20 additions & 1 deletion tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,12 @@
import pytest
import rasterio

import json

from dem import get_dem
from dem.models import DEM, BoundingBox
from exposure.models import ExposureGrid
from exposure import parse_overpass_response
from exposure.models import BuildingCollection, ExposureGrid
from hydrology import build_flow_grid, compute_hand, extract_streams
from hydrology.models import FlowGrid, Hand, StreamNetwork

Expand All @@ -19,6 +22,9 @@
ROBIT_BATA_WORLDPOP_FIXTURE: Path = (
Path(__file__).resolve().parent / "fixtures" / "robit_bata_worldpop.tif"
)
ROBIT_BATA_OSM_FIXTURE: Path = (
Path(__file__).resolve().parent / "fixtures" / "robit_bata_osm.json"
)

ROBIT_BATA_FIXTURE: Path = Path(__file__).resolve().parent / "fixtures" / "robit_bata.tif"

Expand Down Expand Up @@ -138,6 +144,19 @@ def robit_bata_worldpop() -> ExposureGrid:
)


@pytest.fixture(scope="session")
def robit_bata_osm() -> BuildingCollection:
"""Parse the committed Robit Bata OSM Overpass response into buildings.

Saved as raw JSON so the fixture exercises `parse_overpass_response`
end to end. Regenerate via
`python tests/fixtures/_generate_robit_bata_osm.py`.
"""
with open(ROBIT_BATA_OSM_FIXTURE) as f:
payload = json.load(f)
return parse_overpass_response(payload)


@pytest.fixture(scope="session")
def ne_brazil_dem() -> DEM:
"""Live DEM at (-5.0, -39.0) with the default buffer.
Expand Down
120 changes: 120 additions & 0 deletions tests/exposure/test_osm.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,120 @@
"""Tests for exposure.osm (offline using committed Overpass JSON fixture)."""

import math

import pytest

from exposure import get_osm_buildings, parse_overpass_response
from exposure.models import BuildingCollection, BuildingFootprint
from exposure.osm import _polygon_area_m2


class TestPolygonAreaApproximation:
def test_unit_square_at_equator(self) -> None:
# 0.001 deg square at the equator -> ~111.32 m on a side, ~12_392 m^2.
polygon = [(0.0, 0.0), (0.0, 0.001), (0.001, 0.001), (0.001, 0.0)]
area = _polygon_area_m2(polygon)
# Allow 0.5% tolerance for the cos(lat) approximation at the centroid.
assert area == pytest.approx(12392.5, rel=0.005)

def test_polygon_at_robit_latitude(self) -> None:
# 0.001 deg square at lat=11.8: lon shrinks by cos(11.8 deg) ~ 0.9788.
# Side lengths ~ 111.32 x 108.96; area ~ 12,131 m^2.
polygon = [
(11.8, 37.5),
(11.8, 37.501),
(11.801, 37.501),
(11.801, 37.5),
]
area = _polygon_area_m2(polygon)
expected = 111.32 * (111.32 * math.cos(math.radians(11.8005)))
assert area == pytest.approx(expected, rel=0.005)

def test_degenerate_polygon_returns_zero(self) -> None:
assert _polygon_area_m2([(0.0, 0.0), (0.0, 1.0)]) == 0.0


class TestParseOverpassResponse:
def test_empty_response(self) -> None:
collection = parse_overpass_response({"elements": []})
assert len(collection) == 0

def test_skips_non_way_elements(self) -> None:
payload = {
"elements": [
{"type": "node", "id": 1, "lat": 0, "lon": 0},
{"type": "relation", "id": 2, "tags": {"building": "yes"}},
]
}
collection = parse_overpass_response(payload)
assert len(collection) == 0

def test_skips_ways_with_too_few_vertices(self) -> None:
payload = {
"elements": [
{
"type": "way",
"id": 99,
"tags": {"building": "house"},
"geometry": [{"lat": 0, "lon": 0}, {"lat": 0, "lon": 1}],
}
]
}
collection = parse_overpass_response(payload)
assert len(collection) == 0


class TestRobitBataOsmFixture:
"""Use the committed Overpass JSON fixture to assert the parser."""

def test_total_building_count(
self,
robit_bata_osm: BuildingCollection,
) -> None:
# Pinned: 35 buildings in the Robit Bata bbox at fixture-generation time.
assert len(robit_bata_osm) == 35

def test_total_area_m2(
self,
robit_bata_osm: BuildingCollection,
) -> None:
# Pinned to the fixture: ~4,129 m^2 of OSM-mapped building footprints.
assert robit_bata_osm.total_area_m2() == pytest.approx(4129.2, abs=1.0)

def test_largest_building_is_place_of_worship(
self,
robit_bata_osm: BuildingCollection,
) -> None:
# Pinned: the largest mapped building in the patch is a place of
# worship at ~599 m^2 (church).
largest = max(robit_bata_osm.buildings, key=lambda b: b.area_m2)
assert largest.area_m2 == pytest.approx(598.9, abs=1.0)
assert largest.tags.get("amenity") == "place_of_worship"

def test_each_building_has_polygon_and_tags(
self,
robit_bata_osm: BuildingCollection,
) -> None:
for b in robit_bata_osm.buildings:
assert isinstance(b, BuildingFootprint)
assert len(b.polygon) >= 3
assert b.area_m2 > 0
assert "building" in b.tags


@pytest.mark.integration
class TestGetOsmBuildingsIntegration:
def test_live_query_returns_buildings(self) -> None:
# Live Overpass query for a tiny well-mapped urban area: central
# Liechtenstein (Vaduz). Uses a small bbox to keep response size low.
collection = get_osm_buildings(
lat=47.141,
lon=9.521,
buffer_deg=0.005,
timeout_seconds=30,
)
# Vaduz core has at least a few dozen mapped buildings.
assert len(collection) > 10
for b in collection.buildings:
assert b.area_m2 > 0
assert "building" in b.tags
Loading
Loading