44
55import hashlib
66from collections import OrderedDict
7- from dataclasses import dataclass
7+ from collections .abc import Mapping
8+ from dataclasses import dataclass , field
89from datetime import datetime
910from pathlib import Path
1011from typing import Any , Protocol
@@ -30,6 +31,47 @@ def delete(self, uri: str) -> None:
3031 """Delete previously persisted payload bytes when retention removes a run."""
3132
3233
34+ @dataclass (frozen = True )
35+ class ExternalPayloadStoragePolicy :
36+ """Normalized external payload storage policy from server or Cloud APIs."""
37+
38+ enabled : bool
39+ driver : str | None = None
40+ threshold_bytes : int | None = None
41+ config : Mapping [str , Any ] = field (default_factory = dict )
42+ reference : str | None = None
43+ prefix : str = ""
44+ mode : str | None = None
45+ status : str | None = None
46+ integrity_required : bool = True
47+
48+ @classmethod
49+ def from_dict (cls , data : object ) -> ExternalPayloadStoragePolicy :
50+ """Parse a server namespace or Cloud organization storage policy."""
51+ policy = _extract_policy (data )
52+ driver = _optional_string (policy .get ("driver" ), "external payload storage driver" )
53+ enabled = bool (policy .get ("enabled" , driver is not None ))
54+ threshold_bytes = _optional_positive_int (policy .get ("threshold_bytes" ), "threshold_bytes" )
55+ config = _optional_mapping (policy .get ("config" ), "config" )
56+ prefix = _optional_string (policy .get ("prefix" ), "prefix" ) or _optional_string (
57+ config .get ("prefix" ),
58+ "config.prefix" ,
59+ )
60+ integrity_required = bool (policy .get ("integrity_required" , True ))
61+
62+ return cls (
63+ enabled = enabled ,
64+ driver = driver ,
65+ threshold_bytes = threshold_bytes ,
66+ config = config ,
67+ reference = _optional_string (policy .get ("reference" ), "reference" ),
68+ prefix = prefix or "" ,
69+ mode = _optional_string (policy .get ("mode" ), "mode" ),
70+ status = _optional_string (policy .get ("status" ), "status" ),
71+ integrity_required = integrity_required ,
72+ )
73+
74+
3375@dataclass (frozen = True )
3476class ExternalPayloadReference :
3577 """Stable wire envelope for a payload stored outside workflow history."""
@@ -321,6 +363,70 @@ def delete(self, uri: str) -> None:
321363 self .container_client .delete_blob (key )
322364
323365
366+ def external_storage_driver_from_policy (
367+ policy : ExternalPayloadStoragePolicy | Mapping [str , Any ],
368+ * ,
369+ s3_client : Any | None = None ,
370+ gcs_client : Any | None = None ,
371+ azure_container_client : Any | None = None ,
372+ local_root : str | Path | None = None ,
373+ ) -> ExternalStorageDriver :
374+ """Build an SDK storage driver from a server or Cloud policy payload.
375+
376+ Provider SDK clients remain application-owned. Pass the already-configured
377+ S3/GCS/Azure client that matches the policy returned by the control plane.
378+ """
379+ normalized = (
380+ policy
381+ if isinstance (policy , ExternalPayloadStoragePolicy )
382+ else ExternalPayloadStoragePolicy .from_dict (policy )
383+ )
384+ if not normalized .enabled :
385+ raise ValueError ("external payload storage policy is disabled" )
386+ if normalized .driver is None :
387+ raise ValueError ("external payload storage policy driver is required" )
388+
389+ driver = normalized .driver .lower ()
390+ if driver == "local" :
391+ root = local_root or _policy_string (normalized , "uri" ) or normalized .reference
392+ if root is None :
393+ raise ValueError ("local external payload storage policy requires config.uri or local_root" )
394+ return LocalFilesystemExternalStorage (_local_path (root ))
395+
396+ if driver == "s3" :
397+ if s3_client is None :
398+ raise ValueError ("s3 external payload storage policy requires s3_client" )
399+ bucket = _policy_string (normalized , "bucket" ) or _s3_bucket_from_reference (normalized .reference )
400+ if bucket is None :
401+ raise ValueError ("s3 external payload storage policy requires config.bucket or an S3 bucket reference" )
402+ return S3ExternalStorage (s3_client , bucket = bucket , prefix = _policy_prefix (normalized ))
403+
404+ if driver == "gcs" :
405+ if gcs_client is None :
406+ raise ValueError ("gcs external payload storage policy requires gcs_client" )
407+ bucket = _policy_string (normalized , "bucket" ) or _gcs_bucket_from_reference (normalized .reference )
408+ if bucket is None :
409+ raise ValueError ("gcs external payload storage policy requires config.bucket or a GCS bucket reference" )
410+ return GCSExternalStorage (gcs_client , bucket = bucket , prefix = _policy_prefix (normalized ))
411+
412+ if driver in {"azure" , "azure-blob" }:
413+ if azure_container_client is None :
414+ raise ValueError ("azure external payload storage policy requires azure_container_client" )
415+ container = (
416+ _policy_string (normalized , "container" )
417+ or _policy_string (normalized , "bucket" )
418+ or _azure_container_from_reference (normalized .reference )
419+ )
420+ if container is None :
421+ raise ValueError (
422+ "azure external payload storage policy requires config.container, "
423+ "config.bucket, or a container reference"
424+ )
425+ return AzureBlobExternalStorage (azure_container_client , container = container , prefix = _policy_prefix (normalized ))
426+
427+ raise ValueError (f"unsupported external payload storage driver { normalized .driver !r} " )
428+
429+
324430def store_external_payload (
325431 driver : ExternalStorageDriver ,
326432 data : bytes ,
@@ -387,6 +493,96 @@ def _validate_sha256(sha256: str) -> None:
387493 raise ValueError ("sha256 must be a hex digest" ) from exc
388494
389495
496+ def _extract_policy (data : object ) -> Mapping [str , Any ]:
497+ if not isinstance (data , Mapping ):
498+ raise ValueError ("external payload storage policy must be an object" )
499+ nested = data .get ("external_payload_storage" )
500+ if nested is not None :
501+ if not isinstance (nested , Mapping ):
502+ raise ValueError ("external_payload_storage must be an object" )
503+ return nested
504+ return data
505+
506+
507+ def _optional_mapping (data : object , field_name : str ) -> Mapping [str , Any ]:
508+ if data is None :
509+ return {}
510+ if not isinstance (data , Mapping ):
511+ raise ValueError (f"external payload storage policy { field_name } must be an object" )
512+ return data
513+
514+
515+ def _optional_string (data : object , field_name : str ) -> str | None :
516+ if data is None :
517+ return None
518+ if not isinstance (data , str ):
519+ raise ValueError (f"external payload storage policy { field_name } must be a string" )
520+ return data
521+
522+
523+ def _optional_positive_int (data : object , field_name : str ) -> int | None :
524+ if data is None :
525+ return None
526+ if not isinstance (data , int ) or data < 1 :
527+ raise ValueError (f"external payload storage policy { field_name } must be a positive integer" )
528+ return data
529+
530+
531+ def _policy_string (policy : ExternalPayloadStoragePolicy , key : str ) -> str | None :
532+ return _optional_string (policy .config .get (key ), f"config.{ key } " )
533+
534+
535+ def _policy_prefix (policy : ExternalPayloadStoragePolicy ) -> str :
536+ return _policy_string (policy , "prefix" ) or policy .prefix
537+
538+
539+ def _local_path (root : str | Path ) -> str | Path :
540+ if isinstance (root , Path ):
541+ return root
542+ parsed = urlparse (root )
543+ if parsed .scheme == "file" :
544+ if parsed .netloc not in {"" , "localhost" }:
545+ raise ValueError ("local external payload storage file URI must be local" )
546+ return unquote (parsed .path )
547+ if parsed .scheme :
548+ raise ValueError ("local external payload storage policy must use a file:// URI or filesystem path" )
549+ return root
550+
551+
552+ def _s3_bucket_from_reference (reference : str | None ) -> str | None :
553+ if reference is None :
554+ return None
555+ if reference .startswith ("arn:aws:s3:::" ):
556+ bucket = reference .removeprefix ("arn:aws:s3:::" ).split ("/" , 1 )[0 ]
557+ return bucket or None
558+ parsed = urlparse (reference )
559+ if parsed .scheme == "s3" and parsed .netloc :
560+ return parsed .netloc
561+ return reference or None
562+
563+
564+ def _gcs_bucket_from_reference (reference : str | None ) -> str | None :
565+ if reference is None :
566+ return None
567+ marker = "/buckets/"
568+ if marker in reference :
569+ bucket = reference .rsplit (marker , 1 )[1 ].split ("/" , 1 )[0 ]
570+ return bucket or None
571+ parsed = urlparse (reference )
572+ if parsed .scheme == "gs" and parsed .netloc :
573+ return parsed .netloc
574+ return reference or None
575+
576+
577+ def _azure_container_from_reference (reference : str | None ) -> str | None :
578+ if reference is None :
579+ return None
580+ parsed = urlparse (reference )
581+ if parsed .scheme in {"azure" , "azure-blob" } and parsed .netloc :
582+ return parsed .netloc
583+ return reference or None
584+
585+
390586def _validate_rfc3339 (value : str ) -> None :
391587 normalized = value [:- 1 ] + "+00:00" if value .endswith ("Z" ) else value
392588 try :
0 commit comments