Skip to content

Commit 346f029

Browse files
committed
fix(bootstrap): two-step encoding for procedures and deployments
ensure_procedure and ensure_deployment now mirror ensure_system: POST a geo+json stub (uid/name/description/geometry/featureType/validTime), then optionally PUT a SensorML body with Content-Type: application/sml+json against /resource/{id}. Also adds _warn_if_sml_fields_in_stub: a closed-set guardrail that warns (or raises, when OS4CSAPI_STRICT_BOOTSTRAP=1) if a stub body still carries SensorML-only fields under properties. force_sml=True now applies to procedures and deployments as well as systems, allowing in-place recovery for records that were created with the old single-POST shape. Background: pre-strict CSAPI servers returned HTTP 201 and silently dropped SensorML metadata on procedures/deployments. Strict upstream (connected-systems-go after a467aba) returns HTTP 400. Either way, the bug was on the client. Refs: #5
1 parent 04b7354 commit 346f029

1 file changed

Lines changed: 202 additions & 19 deletions

File tree

publishers/bootstrap_helpers.py

Lines changed: 202 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,32 @@
88
Functions:
99
find_by_uid() — Lookup resource by UID in a collection
1010
find_datastream() — Lookup datastream by outputName under a system
11-
ensure_procedure() — Create procedure if not exists
12-
ensure_system() — Create system (geo+json stub POST → SensorML PUT)
11+
ensure_procedure() — Create procedure (geo+json stub POST → optional SensorML PUT)
12+
ensure_system() — Create system (geo+json stub POST → optional SensorML PUT)
1313
ensure_datastream() — Create datastream with SWE DataRecord schema
14-
ensure_deployment() — Create deployment node (with optional parent)
14+
ensure_deployment() — Create deployment node (geo+json stub POST → optional SensorML PUT)
1515
clean_resource() — Delete resource by UID if it exists
1616
api_get/post/put/delete() — Low-level HTTP helpers with retry
17+
18+
Content-type contract (CSAPI Part 1, OGC 23-001):
19+
- application/geo+json → spatial-discovery view; carries uid/name/description
20+
(+ geometry) only. SensorML metadata is INTENTIONALLY
21+
stripped server-side.
22+
- application/sml+json → full SensorML metadata view; carries keywords,
23+
identifiers, classifiers, characteristics, capabilities,
24+
contacts, documentation/documents, history,
25+
securityConstraints, legalConstraints, etc.
26+
27+
Bootstrap pattern for procedures, systems, and deployments:
28+
1. POST a small geo+json stub (Content-Type: application/json — server
29+
interprets as application/geo+json on these endpoints).
30+
2. PUT the full SensorML body (Content-Type: application/sml+json) against
31+
the just-created /resource/{id} path.
32+
33+
This module enforces the contract via _warn_if_sml_fields_in_stub(): if a
34+
caller passes a "stub" with SensorML-only fields under properties, a loud
35+
warning is emitted. Set OS4CSAPI_STRICT_BOOTSTRAP=1 to elevate the warning
36+
to an exception (recommended for tests and CI).
1737
"""
1838

1939
import argparse
@@ -239,27 +259,142 @@ def find_datastream(base_url: str, auth: str, system_id: str,
239259
return None
240260

241261

262+
# ═══════════════════════════════════════════════════════════════════════════
263+
# Encoding-contract guardrail
264+
# ═══════════════════════════════════════════════════════════════════════════
265+
266+
# SensorML-only fields that the CSAPI server silently strips when it sees
267+
# them under `properties` of a geo+json POST. Any of these fields appearing
268+
# in a "stub" body indicates the caller has not split GeoJSON encoding from
269+
# SensorML encoding properly — the stub will be accepted (HTTP 201) but
270+
# the listed fields will be DROPPED on the server side.
271+
#
272+
# Background: pre-strict CSAPI servers returned 201 + silent drop. Strict
273+
# servers (post-`a467aba` upstream) return HTTP 400. Either way, the bug
274+
# is on the client. See docs/engineering/2026-05-silent-sensorml-field-loss.md
275+
SML_ONLY_FIELDS = frozenset({
276+
"keywords",
277+
"identifiers",
278+
"classifiers",
279+
"characteristics",
280+
"capabilities",
281+
"contacts",
282+
"documentation", # OGC links-array form (not the SensorML `documents` form)
283+
"documents", # SensorML form
284+
"history",
285+
"securityConstraints",
286+
"legalConstraints",
287+
"lineage",
288+
"usageConstraints",
289+
"typeOf",
290+
"configuration",
291+
"modes",
292+
"parameters",
293+
"inputs",
294+
"outputs",
295+
"components",
296+
"connections",
297+
"localReferenceFrames",
298+
"localTimeFrames",
299+
"method",
300+
})
301+
302+
_STRICT_BOOTSTRAP = os.environ.get("OS4CSAPI_STRICT_BOOTSTRAP", "").lower() in ("1", "true", "yes")
303+
304+
305+
def _warn_if_sml_fields_in_stub(stub: dict, label: str) -> None:
306+
"""Loud warning (or exception in strict mode) if a caller passes a 'stub'
307+
body whose `properties` contain SensorML-only fields.
308+
309+
These fields will be silently dropped server-side on a geo+json POST.
310+
Callers must split SensorML metadata out into a separate ``sml_body``
311+
and let the helper PUT it with ``Content-Type: application/sml+json``.
312+
313+
Set OS4CSAPI_STRICT_BOOTSTRAP=1 to convert the warning to RuntimeError —
314+
recommended for tests and CI.
315+
"""
316+
if not isinstance(stub, dict):
317+
return
318+
props = stub.get("properties", stub)
319+
if not isinstance(props, dict):
320+
return
321+
leaked = sorted(SML_ONLY_FIELDS & set(props.keys()))
322+
if not leaked:
323+
return
324+
msg = (
325+
f"[ENCODING-CONTRACT] {label}: stub body carries SensorML-only "
326+
f"field(s) under `properties`: {leaked}. These will be silently "
327+
f"dropped (or 400-rejected by strict servers) on the geo+json POST. "
328+
f"Move them into a separate sml_body argument."
329+
)
330+
if _STRICT_BOOTSTRAP:
331+
raise RuntimeError(msg)
332+
print(f" [WARN] {msg}")
333+
334+
242335
# ═══════════════════════════════════════════════════════════════════════════
243336
# Idempotent resource creation
244337
# ═══════════════════════════════════════════════════════════════════════════
245338

246-
def ensure_procedure(base_url: str, auth: str, uid: str, body: dict,
247-
*, dry_run: bool = False, stats: dict = None) -> str | None:
248-
"""Create a procedure if it doesn't already exist. Returns server ID."""
339+
def ensure_procedure(base_url: str, auth: str, uid: str, stub_body: dict,
340+
sml_body: dict | None = None,
341+
*, dry_run: bool = False, stats: dict = None,
342+
force_sml: bool = False) -> str | None:
343+
"""Create a procedure if it doesn't already exist. Returns server ID.
344+
345+
Two-step encoding-correct pattern (mirrors ``ensure_system``):
346+
347+
1. POST ``stub_body`` (geo+json Feature: uid/name/description + optional
348+
geometry) with ``Content-Type: application/json``. The server
349+
interprets this as ``application/geo+json`` on the procedures
350+
endpoint.
351+
2. If ``sml_body`` is provided, PUT it against the new resource path
352+
with ``Content-Type: application/sml+json`` to populate full
353+
SensorML metadata (keywords, identifiers, classifiers,
354+
characteristics, capabilities, contacts, documents, history,
355+
securityConstraints, legalConstraints, …).
356+
357+
When ``force_sml`` is True and the procedure already exists, the
358+
SensorML body is PUT again — useful for correcting previously-broken
359+
payloads after this fix lands.
360+
361+
Callers MUST keep SensorML metadata out of the stub. The
362+
``_warn_if_sml_fields_in_stub`` guardrail catches accidental leakage.
363+
"""
364+
_warn_if_sml_fields_in_stub(stub_body, f"ensure_procedure({uid})")
365+
249366
existing = find_by_uid(base_url, auth, "procedures", uid)
250367
if existing:
251-
print(f" [SKIP] Procedure {uid} already exists (id={existing})")
252-
if stats:
253-
stats.setdefault("skipped", 0)
254-
stats["skipped"] += 1
368+
if force_sml and sml_body:
369+
if dry_run:
370+
print(f" [DRY] Would force-PUT SML for procedure {uid} (id={existing})")
371+
else:
372+
api_put(base_url, f"procedures/{existing}", sml_body, auth,
373+
content_type="application/sml+json")
374+
print(f" [SML] Force-PUT SensorML for procedure {uid} (id={existing})")
375+
if stats:
376+
stats.setdefault("sml_updated", 0)
377+
stats["sml_updated"] += 1
378+
else:
379+
print(f" [SKIP] Procedure {uid} already exists (id={existing})")
380+
if stats:
381+
stats.setdefault("skipped", 0)
382+
stats["skipped"] += 1
255383
return existing
256384

257385
if dry_run:
258386
print(f" [DRY] Would create procedure: {uid}")
259387
return None
260388

261-
result = api_post(base_url, "procedures", body, auth)
389+
# Step 1: POST geo+json stub
390+
result = api_post(base_url, "procedures", stub_body, auth)
262391
new_id = result.get("id") if result else None
392+
393+
# Step 2: PUT SensorML if provided
394+
if new_id and sml_body:
395+
api_put(base_url, f"procedures/{new_id}", sml_body, auth,
396+
content_type="application/sml+json")
397+
263398
print(f" [OK] Created procedure {uid} → id={new_id}")
264399
if stats:
265400
stats.setdefault("created", 0)
@@ -278,6 +413,8 @@ def ensure_system(base_url: str, auth: str, uid: str, stub_body: dict,
278413
When *force_sml* is True and the system already exists, the SML body is
279414
PUT again (useful for correcting previously-broken SML payloads).
280415
"""
416+
_warn_if_sml_fields_in_stub(stub_body, f"ensure_system({uid})")
417+
281418
existing = find_by_uid(base_url, auth, "systems", uid)
282419
if existing:
283420
if force_sml and sml_body:
@@ -345,33 +482,79 @@ def ensure_datastream(base_url: str, auth: str, system_id: str,
345482
return new_id
346483

347484

348-
def ensure_deployment(base_url: str, auth: str, uid: str, body: dict,
485+
def ensure_deployment(base_url: str, auth: str, uid: str, stub_body: dict,
486+
sml_body: dict | None = None,
349487
parent_id: str | None = None,
350-
*, dry_run: bool = False, stats: dict = None) -> str | None:
351-
"""Create a deployment node if it doesn't exist. Returns server ID."""
488+
*, dry_run: bool = False, stats: dict = None,
489+
force_sml: bool = False) -> str | None:
490+
"""Create a deployment node if it doesn't exist. Returns server ID.
491+
492+
Two-step encoding-correct pattern (mirrors ``ensure_system`` and
493+
``ensure_procedure``):
494+
495+
1. POST ``stub_body`` (geo+json Feature: uid/name/description +
496+
optional geometry, validTime, deployment-tree links) with
497+
``Content-Type: application/json``. Server interprets as
498+
``application/geo+json``.
499+
2. If ``sml_body`` is provided, PUT it against the new resource path
500+
with ``Content-Type: application/sml+json`` to populate full
501+
SensorML metadata (keywords, identifiers, classifiers,
502+
characteristics, capabilities, contacts, documents, history,
503+
securityConstraints, legalConstraints, …).
504+
505+
When ``parent_id`` is given, the create path is
506+
``deployments/{parent_id}/subdeployments``; the SML PUT still targets
507+
the canonical ``deployments/{new_id}`` path.
508+
509+
When ``force_sml`` is True and the deployment already exists, the
510+
SensorML body is PUT again.
511+
512+
Callers MUST keep SensorML metadata out of the stub. The
513+
``_warn_if_sml_fields_in_stub`` guardrail catches accidental leakage.
514+
"""
515+
_warn_if_sml_fields_in_stub(stub_body, f"ensure_deployment({uid})")
516+
352517
# Check top-level deployments first
353518
existing = find_by_uid(base_url, auth, "deployments", uid)
354519
if not existing and parent_id:
355520
# Go server only lists subdeployments under parent endpoint
356521
existing = find_by_uid(base_url, auth,
357522
f"deployments/{parent_id}/subdeployments", uid)
358523
if existing:
359-
print(f" [SKIP] Deployment {uid} already exists (id={existing})")
360-
if stats:
361-
stats.setdefault("skipped", 0)
362-
stats["skipped"] += 1
524+
if force_sml and sml_body:
525+
if dry_run:
526+
print(f" [DRY] Would force-PUT SML for deployment {uid} (id={existing})")
527+
else:
528+
api_put(base_url, f"deployments/{existing}", sml_body, auth,
529+
content_type="application/sml+json")
530+
print(f" [SML] Force-PUT SensorML for deployment {uid} (id={existing})")
531+
if stats:
532+
stats.setdefault("sml_updated", 0)
533+
stats["sml_updated"] += 1
534+
else:
535+
print(f" [SKIP] Deployment {uid} already exists (id={existing})")
536+
if stats:
537+
stats.setdefault("skipped", 0)
538+
stats["skipped"] += 1
363539
return existing
364540

365541
if dry_run:
366542
print(f" [DRY] Would create deployment: {uid}")
367543
return None
368544

545+
# Step 1: POST geo+json stub at the (possibly nested) create path
369546
path = "deployments"
370547
if parent_id:
371548
path = f"deployments/{parent_id}/subdeployments"
372549

373-
result = api_post(base_url, path, body, auth)
550+
result = api_post(base_url, path, stub_body, auth)
374551
new_id = result.get("id") if result else None
552+
553+
# Step 2: PUT SensorML against the canonical /deployments/{id} path
554+
if new_id and sml_body:
555+
api_put(base_url, f"deployments/{new_id}", sml_body, auth,
556+
content_type="application/sml+json")
557+
375558
print(f" [OK] Created deployment {uid} → id={new_id}")
376559
if stats:
377560
stats.setdefault("created", 0)

0 commit comments

Comments
 (0)