88Functions:
99 find_by_uid() — Lookup resource by UID in a collection
1010 find_datastream() — Lookup datastream by outputName under a system
11- ensure_procedure() — Create procedure if not exists
12- ensure_system() — Create system (geo+json stub POST → SensorML PUT)
11+ ensure_procedure() — Create procedure (geo+json stub POST → optional SensorML PUT)
12+ ensure_system() — Create system (geo+json stub POST → optional SensorML PUT)
1313 ensure_datastream() — Create datastream with SWE DataRecord schema
14- ensure_deployment() — Create deployment node (with optional parent )
14+ ensure_deployment() — Create deployment node (geo+json stub POST → optional SensorML PUT )
1515 clean_resource() — Delete resource by UID if it exists
1616 api_get/post/put/delete() — Low-level HTTP helpers with retry
17+
18+ Content-type contract (CSAPI Part 1, OGC 23-001):
19+ - application/geo+json → spatial-discovery view; carries uid/name/description
20+ (+ geometry) only. SensorML metadata is INTENTIONALLY
21+ stripped server-side.
22+ - application/sml+json → full SensorML metadata view; carries keywords,
23+ identifiers, classifiers, characteristics, capabilities,
24+ contacts, documentation/documents, history,
25+ securityConstraints, legalConstraints, etc.
26+
27+ Bootstrap pattern for procedures, systems, and deployments:
28+ 1. POST a small geo+json stub (Content-Type: application/json — server
29+ interprets as application/geo+json on these endpoints).
30+ 2. PUT the full SensorML body (Content-Type: application/sml+json) against
31+ the just-created /resource/{id} path.
32+
33+ This module enforces the contract via _warn_if_sml_fields_in_stub(): if a
34+ caller passes a "stub" with SensorML-only fields under properties, a loud
35+ warning is emitted. Set OS4CSAPI_STRICT_BOOTSTRAP=1 to elevate the warning
36+ to an exception (recommended for tests and CI).
1737"""
1838
1939import argparse
@@ -239,27 +259,142 @@ def find_datastream(base_url: str, auth: str, system_id: str,
239259 return None
240260
241261
262+ # ═══════════════════════════════════════════════════════════════════════════
263+ # Encoding-contract guardrail
264+ # ═══════════════════════════════════════════════════════════════════════════
265+
266+ # SensorML-only fields that the CSAPI server silently strips when it sees
267+ # them under `properties` of a geo+json POST. Any of these fields appearing
268+ # in a "stub" body indicates the caller has not split GeoJSON encoding from
269+ # SensorML encoding properly — the stub will be accepted (HTTP 201) but
270+ # the listed fields will be DROPPED on the server side.
271+ #
272+ # Background: pre-strict CSAPI servers returned 201 + silent drop. Strict
273+ # servers (post-`a467aba` upstream) return HTTP 400. Either way, the bug
274+ # is on the client. See docs/engineering/2026-05-silent-sensorml-field-loss.md
275+ SML_ONLY_FIELDS = frozenset ({
276+ "keywords" ,
277+ "identifiers" ,
278+ "classifiers" ,
279+ "characteristics" ,
280+ "capabilities" ,
281+ "contacts" ,
282+ "documentation" , # OGC links-array form (not the SensorML `documents` form)
283+ "documents" , # SensorML form
284+ "history" ,
285+ "securityConstraints" ,
286+ "legalConstraints" ,
287+ "lineage" ,
288+ "usageConstraints" ,
289+ "typeOf" ,
290+ "configuration" ,
291+ "modes" ,
292+ "parameters" ,
293+ "inputs" ,
294+ "outputs" ,
295+ "components" ,
296+ "connections" ,
297+ "localReferenceFrames" ,
298+ "localTimeFrames" ,
299+ "method" ,
300+ })
301+
302+ _STRICT_BOOTSTRAP = os .environ .get ("OS4CSAPI_STRICT_BOOTSTRAP" , "" ).lower () in ("1" , "true" , "yes" )
303+
304+
305+ def _warn_if_sml_fields_in_stub (stub : dict , label : str ) -> None :
306+ """Loud warning (or exception in strict mode) if a caller passes a 'stub'
307+ body whose `properties` contain SensorML-only fields.
308+
309+ These fields will be silently dropped server-side on a geo+json POST.
310+ Callers must split SensorML metadata out into a separate ``sml_body``
311+ and let the helper PUT it with ``Content-Type: application/sml+json``.
312+
313+ Set OS4CSAPI_STRICT_BOOTSTRAP=1 to convert the warning to RuntimeError —
314+ recommended for tests and CI.
315+ """
316+ if not isinstance (stub , dict ):
317+ return
318+ props = stub .get ("properties" , stub )
319+ if not isinstance (props , dict ):
320+ return
321+ leaked = sorted (SML_ONLY_FIELDS & set (props .keys ()))
322+ if not leaked :
323+ return
324+ msg = (
325+ f"[ENCODING-CONTRACT] { label } : stub body carries SensorML-only "
326+ f"field(s) under `properties`: { leaked } . These will be silently "
327+ f"dropped (or 400-rejected by strict servers) on the geo+json POST. "
328+ f"Move them into a separate sml_body argument."
329+ )
330+ if _STRICT_BOOTSTRAP :
331+ raise RuntimeError (msg )
332+ print (f" [WARN] { msg } " )
333+
334+
242335# ═══════════════════════════════════════════════════════════════════════════
243336# Idempotent resource creation
244337# ═══════════════════════════════════════════════════════════════════════════
245338
246- def ensure_procedure (base_url : str , auth : str , uid : str , body : dict ,
247- * , dry_run : bool = False , stats : dict = None ) -> str | None :
248- """Create a procedure if it doesn't already exist. Returns server ID."""
339+ def ensure_procedure (base_url : str , auth : str , uid : str , stub_body : dict ,
340+ sml_body : dict | None = None ,
341+ * , dry_run : bool = False , stats : dict = None ,
342+ force_sml : bool = False ) -> str | None :
343+ """Create a procedure if it doesn't already exist. Returns server ID.
344+
345+ Two-step encoding-correct pattern (mirrors ``ensure_system``):
346+
347+ 1. POST ``stub_body`` (geo+json Feature: uid/name/description + optional
348+ geometry) with ``Content-Type: application/json``. The server
349+ interprets this as ``application/geo+json`` on the procedures
350+ endpoint.
351+ 2. If ``sml_body`` is provided, PUT it against the new resource path
352+ with ``Content-Type: application/sml+json`` to populate full
353+ SensorML metadata (keywords, identifiers, classifiers,
354+ characteristics, capabilities, contacts, documents, history,
355+ securityConstraints, legalConstraints, …).
356+
357+ When ``force_sml`` is True and the procedure already exists, the
358+ SensorML body is PUT again — useful for correcting previously-broken
359+ payloads after this fix lands.
360+
361+ Callers MUST keep SensorML metadata out of the stub. The
362+ ``_warn_if_sml_fields_in_stub`` guardrail catches accidental leakage.
363+ """
364+ _warn_if_sml_fields_in_stub (stub_body , f"ensure_procedure({ uid } )" )
365+
249366 existing = find_by_uid (base_url , auth , "procedures" , uid )
250367 if existing :
251- print (f" [SKIP] Procedure { uid } already exists (id={ existing } )" )
252- if stats :
253- stats .setdefault ("skipped" , 0 )
254- stats ["skipped" ] += 1
368+ if force_sml and sml_body :
369+ if dry_run :
370+ print (f" [DRY] Would force-PUT SML for procedure { uid } (id={ existing } )" )
371+ else :
372+ api_put (base_url , f"procedures/{ existing } " , sml_body , auth ,
373+ content_type = "application/sml+json" )
374+ print (f" [SML] Force-PUT SensorML for procedure { uid } (id={ existing } )" )
375+ if stats :
376+ stats .setdefault ("sml_updated" , 0 )
377+ stats ["sml_updated" ] += 1
378+ else :
379+ print (f" [SKIP] Procedure { uid } already exists (id={ existing } )" )
380+ if stats :
381+ stats .setdefault ("skipped" , 0 )
382+ stats ["skipped" ] += 1
255383 return existing
256384
257385 if dry_run :
258386 print (f" [DRY] Would create procedure: { uid } " )
259387 return None
260388
261- result = api_post (base_url , "procedures" , body , auth )
389+ # Step 1: POST geo+json stub
390+ result = api_post (base_url , "procedures" , stub_body , auth )
262391 new_id = result .get ("id" ) if result else None
392+
393+ # Step 2: PUT SensorML if provided
394+ if new_id and sml_body :
395+ api_put (base_url , f"procedures/{ new_id } " , sml_body , auth ,
396+ content_type = "application/sml+json" )
397+
263398 print (f" [OK] Created procedure { uid } → id={ new_id } " )
264399 if stats :
265400 stats .setdefault ("created" , 0 )
@@ -278,6 +413,8 @@ def ensure_system(base_url: str, auth: str, uid: str, stub_body: dict,
278413 When *force_sml* is True and the system already exists, the SML body is
279414 PUT again (useful for correcting previously-broken SML payloads).
280415 """
416+ _warn_if_sml_fields_in_stub (stub_body , f"ensure_system({ uid } )" )
417+
281418 existing = find_by_uid (base_url , auth , "systems" , uid )
282419 if existing :
283420 if force_sml and sml_body :
@@ -345,33 +482,79 @@ def ensure_datastream(base_url: str, auth: str, system_id: str,
345482 return new_id
346483
347484
348- def ensure_deployment (base_url : str , auth : str , uid : str , body : dict ,
485+ def ensure_deployment (base_url : str , auth : str , uid : str , stub_body : dict ,
486+ sml_body : dict | None = None ,
349487 parent_id : str | None = None ,
350- * , dry_run : bool = False , stats : dict = None ) -> str | None :
351- """Create a deployment node if it doesn't exist. Returns server ID."""
488+ * , dry_run : bool = False , stats : dict = None ,
489+ force_sml : bool = False ) -> str | None :
490+ """Create a deployment node if it doesn't exist. Returns server ID.
491+
492+ Two-step encoding-correct pattern (mirrors ``ensure_system`` and
493+ ``ensure_procedure``):
494+
495+ 1. POST ``stub_body`` (geo+json Feature: uid/name/description +
496+ optional geometry, validTime, deployment-tree links) with
497+ ``Content-Type: application/json``. Server interprets as
498+ ``application/geo+json``.
499+ 2. If ``sml_body`` is provided, PUT it against the new resource path
500+ with ``Content-Type: application/sml+json`` to populate full
501+ SensorML metadata (keywords, identifiers, classifiers,
502+ characteristics, capabilities, contacts, documents, history,
503+ securityConstraints, legalConstraints, …).
504+
505+ When ``parent_id`` is given, the create path is
506+ ``deployments/{parent_id}/subdeployments``; the SML PUT still targets
507+ the canonical ``deployments/{new_id}`` path.
508+
509+ When ``force_sml`` is True and the deployment already exists, the
510+ SensorML body is PUT again.
511+
512+ Callers MUST keep SensorML metadata out of the stub. The
513+ ``_warn_if_sml_fields_in_stub`` guardrail catches accidental leakage.
514+ """
515+ _warn_if_sml_fields_in_stub (stub_body , f"ensure_deployment({ uid } )" )
516+
352517 # Check top-level deployments first
353518 existing = find_by_uid (base_url , auth , "deployments" , uid )
354519 if not existing and parent_id :
355520 # Go server only lists subdeployments under parent endpoint
356521 existing = find_by_uid (base_url , auth ,
357522 f"deployments/{ parent_id } /subdeployments" , uid )
358523 if existing :
359- print (f" [SKIP] Deployment { uid } already exists (id={ existing } )" )
360- if stats :
361- stats .setdefault ("skipped" , 0 )
362- stats ["skipped" ] += 1
524+ if force_sml and sml_body :
525+ if dry_run :
526+ print (f" [DRY] Would force-PUT SML for deployment { uid } (id={ existing } )" )
527+ else :
528+ api_put (base_url , f"deployments/{ existing } " , sml_body , auth ,
529+ content_type = "application/sml+json" )
530+ print (f" [SML] Force-PUT SensorML for deployment { uid } (id={ existing } )" )
531+ if stats :
532+ stats .setdefault ("sml_updated" , 0 )
533+ stats ["sml_updated" ] += 1
534+ else :
535+ print (f" [SKIP] Deployment { uid } already exists (id={ existing } )" )
536+ if stats :
537+ stats .setdefault ("skipped" , 0 )
538+ stats ["skipped" ] += 1
363539 return existing
364540
365541 if dry_run :
366542 print (f" [DRY] Would create deployment: { uid } " )
367543 return None
368544
545+ # Step 1: POST geo+json stub at the (possibly nested) create path
369546 path = "deployments"
370547 if parent_id :
371548 path = f"deployments/{ parent_id } /subdeployments"
372549
373- result = api_post (base_url , path , body , auth )
550+ result = api_post (base_url , path , stub_body , auth )
374551 new_id = result .get ("id" ) if result else None
552+
553+ # Step 2: PUT SensorML against the canonical /deployments/{id} path
554+ if new_id and sml_body :
555+ api_put (base_url , f"deployments/{ new_id } " , sml_body , auth ,
556+ content_type = "application/sml+json" )
557+
375558 print (f" [OK] Created deployment { uid } → id={ new_id } " )
376559 if stats :
377560 stats .setdefault ("created" , 0 )
0 commit comments