|
| 1 | +"""Multi-agent topology manifest served at ``/.well-known/adcp-agents.json``. |
| 2 | +
|
| 3 | +Per AdCP spec (``schemas/source/adcp-agents.json``) every AdCP host |
| 4 | +publishes an origin-scoped manifest enumerating the agents it serves. |
| 5 | +Buyers, conformance runners, and tooling fetch the well-known URL once |
| 6 | +and discover the full topology of the publisher in a single request, |
| 7 | +instead of probing tenant URLs out of band. |
| 8 | +
|
| 9 | +This module owns: |
| 10 | +
|
| 11 | +1. :func:`build_manifest` — a pure function that produces the manifest |
| 12 | + document from the configured handler name + transports + bind |
| 13 | + coordinates. Easy to unit-test, no Starlette dependency. |
| 14 | +2. :func:`make_discovery_route` — wires the document into a Starlette |
| 15 | + :class:`~starlette.routing.Route` so the SDK's ``serve()`` can |
| 16 | + compose it onto every HTTP transport (``streamable-http``, ``a2a``, |
| 17 | + ``both``). |
| 18 | +
|
| 19 | +Stdio has no HTTP surface and skips the route entirely. |
| 20 | +""" |
| 21 | + |
| 22 | +from __future__ import annotations |
| 23 | + |
| 24 | +from datetime import datetime, timezone |
| 25 | +from typing import Any, Literal |
| 26 | + |
| 27 | +from starlette.requests import Request |
| 28 | +from starlette.responses import JSONResponse |
| 29 | +from starlette.routing import Route |
| 30 | + |
| 31 | +#: Path the manifest is served at. Per AdCP spec — operators MUST NOT |
| 32 | +#: change this; consumers fetch from the well-known location only. |
| 33 | +DISCOVERY_PATH = "/.well-known/adcp-agents.json" |
| 34 | + |
| 35 | +#: Manifest schema version this builder emits. Consumers SHOULD ignore |
| 36 | +#: unknown top-level fields rather than fail on version mismatch (per |
| 37 | +#: spec), so bumping minor versions is safe. |
| 38 | +MANIFEST_VERSION = "1.0" |
| 39 | + |
| 40 | +#: ``$schema`` URI emitted in the manifest. Matches the canonical |
| 41 | +#: location consumers use for validation. |
| 42 | +MANIFEST_SCHEMA_URI = "/schemas/adcp-agents.json" |
| 43 | + |
| 44 | + |
| 45 | +Transport = Literal["mcp", "a2a"] |
| 46 | + |
| 47 | + |
| 48 | +def _normalize_agent_id(name: str) -> str: |
| 49 | + """Coerce a human-friendly handler name to a manifest-legal |
| 50 | + ``agent_id``. |
| 51 | +
|
| 52 | + The schema requires lowercase alphanumeric with hyphens/underscores, |
| 53 | + no leading/trailing separators, 1-64 characters. Most adopters pass |
| 54 | + something like ``"My Seller"`` to ``serve(name=...)``; lower-case it |
| 55 | + and replace illegal runs with ``-``. Falls back to ``"agent"`` if |
| 56 | + the input lowers to nothing legal (defensive — empty / all-symbol |
| 57 | + names would otherwise produce an invalid manifest). |
| 58 | + """ |
| 59 | + out: list[str] = [] |
| 60 | + for ch in name.lower(): |
| 61 | + if ch.isalnum() or ch in ("-", "_"): |
| 62 | + out.append(ch) |
| 63 | + else: |
| 64 | + out.append("-") |
| 65 | + cleaned = "".join(out).strip("-_") |
| 66 | + # Collapse runs of separators — looks better and stays under the |
| 67 | + # 64-char cap on long names. Strip BEFORE the length cap so the |
| 68 | + # truncation never lands on a separator that would be stripped |
| 69 | + # away (which would make ``agent_id`` len differ from len(cleaned) |
| 70 | + # in surprising ways). |
| 71 | + while "--" in cleaned: |
| 72 | + cleaned = cleaned.replace("--", "-") |
| 73 | + while "__" in cleaned: |
| 74 | + cleaned = cleaned.replace("__", "_") |
| 75 | + cleaned = cleaned.strip("-_") |
| 76 | + if not cleaned: |
| 77 | + return "agent" |
| 78 | + return cleaned[:64].strip("-_") or "agent" |
| 79 | + |
| 80 | + |
| 81 | +def _agent_url(transport: Transport, base_url: str) -> str: |
| 82 | + """Return the agent endpoint URL for a given transport. |
| 83 | +
|
| 84 | + For ``mcp`` the streamable-HTTP endpoint lives at ``/mcp``. For |
| 85 | + ``a2a`` the agent's base URL is the root — the agent-card lives at |
| 86 | + ``<base>/.well-known/agent-card.json``. |
| 87 | + """ |
| 88 | + base = base_url.rstrip("/") |
| 89 | + if transport == "mcp": |
| 90 | + return f"{base}/mcp" |
| 91 | + return base or "/" |
| 92 | + |
| 93 | + |
| 94 | +def build_manifest( |
| 95 | + *, |
| 96 | + name: str, |
| 97 | + transports: list[Transport], |
| 98 | + base_url: str, |
| 99 | + description: str | None = None, |
| 100 | + specialisms: list[str] | None = None, |
| 101 | +) -> dict[str, Any]: |
| 102 | + """Build the AdCP multi-agent topology manifest document. |
| 103 | +
|
| 104 | + Pure function — no I/O, no globals — so it's trivial to unit-test |
| 105 | + and reuse in adopter tooling that wants to publish a static |
| 106 | + manifest from CI. |
| 107 | +
|
| 108 | + :param name: Operator-supplied agent / platform name. Becomes the |
| 109 | + ``agent_id`` (after normalization to the schema's character |
| 110 | + class) and informs the contact ``name`` field. |
| 111 | + :param transports: Transports the binary serves. ``["mcp"]``, |
| 112 | + ``["a2a"]``, or ``["mcp", "a2a"]`` for ``transport="both"``. |
| 113 | + One manifest entry is emitted per transport — buyers route by |
| 114 | + transport, so each gets its own row even when they share a |
| 115 | + process. |
| 116 | + :param base_url: Origin the binary is reachable at, e.g. |
| 117 | + ``"https://sales.example.com"``. The manifest URL is built as |
| 118 | + ``<base_url>/mcp`` for MCP and ``<base_url>`` for A2A. |
| 119 | + :param description: Optional human-readable description surfaced in |
| 120 | + operator UIs and conformance reports. |
| 121 | + :param specialisms: Optional AdCP specialisms (e.g. |
| 122 | + ``["sales-non-guaranteed"]``). The schema requires ``minItems: |
| 123 | + 1`` so when nothing is supplied we fall back to a minimal |
| 124 | + ``["adcp"]`` placeholder. Adopters who know their specialism |
| 125 | + SHOULD pass it explicitly. |
| 126 | + """ |
| 127 | + # TODO(#381): infer specialisms from the handler's advertised |
| 128 | + # tools (e.g. presence of ``get_products`` → sales-non-guaranteed). |
| 129 | + # For now adopters pass them explicitly or accept the placeholder. |
| 130 | + effective_specialisms = list(specialisms) if specialisms else ["adcp"] |
| 131 | + |
| 132 | + base_id = _normalize_agent_id(name) |
| 133 | + agents: list[dict[str, Any]] = [] |
| 134 | + for transport in transports: |
| 135 | + # When emitting two rows from the same binary the schema requires |
| 136 | + # unique agent_ids — suffix with the transport so ``foo-mcp`` and |
| 137 | + # ``foo-a2a`` are both legal and self-describing. |
| 138 | + agent_id = f"{base_id}-{transport}" if len(transports) > 1 else base_id |
| 139 | + entry: dict[str, Any] = { |
| 140 | + "agent_id": agent_id, |
| 141 | + "url": _agent_url(transport, base_url), |
| 142 | + "transport": transport, |
| 143 | + "specialisms": effective_specialisms, |
| 144 | + } |
| 145 | + if description: |
| 146 | + entry["description"] = description |
| 147 | + agents.append(entry) |
| 148 | + |
| 149 | + # Truncate to whole-hour granularity so consecutive requests within |
| 150 | + # the same hour produce byte-identical manifests — lets HTTP caches |
| 151 | + # (CDNs, conformance runners polling on a loop) collapse repeated |
| 152 | + # fetches instead of seeing a fresh second-resolution timestamp on |
| 153 | + # every hit. Hour-resolution is well within the spec's "informational |
| 154 | + # only" semantics for ``last_updated``. |
| 155 | + last_updated = ( |
| 156 | + datetime.now(timezone.utc) |
| 157 | + .replace(minute=0, second=0, microsecond=0) |
| 158 | + .strftime("%Y-%m-%dT%H:%M:%SZ") |
| 159 | + ) |
| 160 | + manifest: dict[str, Any] = { |
| 161 | + "$schema": MANIFEST_SCHEMA_URI, |
| 162 | + "version": MANIFEST_VERSION, |
| 163 | + "agents": agents, |
| 164 | + "last_updated": last_updated, |
| 165 | + } |
| 166 | + if name: |
| 167 | + manifest["contact"] = {"name": name} |
| 168 | + return manifest |
| 169 | + |
| 170 | + |
| 171 | +def make_discovery_route( |
| 172 | + *, |
| 173 | + name: str, |
| 174 | + transports: list[Transport], |
| 175 | + base_url: str, |
| 176 | + description: str | None = None, |
| 177 | + specialisms: list[str] | None = None, |
| 178 | +) -> Route: |
| 179 | + """Build a Starlette :class:`Route` serving the discovery manifest. |
| 180 | +
|
| 181 | + The route is GET-only — POST / PUT / etc. fall through to |
| 182 | + Starlette's default 405 handler, which is the correct behavior for |
| 183 | + a read-only, unauthenticated discovery document. |
| 184 | +
|
| 185 | + The manifest is rebuilt per request so ``last_updated`` reflects |
| 186 | + the current time. The build is cheap (a few hundred bytes of JSON), |
| 187 | + well below the noise floor of any production traffic. |
| 188 | + """ |
| 189 | + |
| 190 | + async def _handler(_request: Request) -> JSONResponse: |
| 191 | + manifest = build_manifest( |
| 192 | + name=name, |
| 193 | + transports=transports, |
| 194 | + base_url=base_url, |
| 195 | + description=description, |
| 196 | + specialisms=specialisms, |
| 197 | + ) |
| 198 | + return JSONResponse(manifest) |
| 199 | + |
| 200 | + return Route(DISCOVERY_PATH, _handler, methods=["GET"]) |
| 201 | + |
| 202 | + |
| 203 | +#: Hosts the spec lets us project as ``http://`` — the AdCP discovery |
| 204 | +#: schema's ``url`` field requires ``^https://`` for non-loopback |
| 205 | +#: targets, but consumers MAY accept ``http://`` for literal localhost |
| 206 | +#: so a dev binary works without TLS scaffolding. |
| 207 | +_LOOPBACK_HOSTS = frozenset({"127.0.0.1", "localhost", "::1"}) |
| 208 | + |
| 209 | + |
| 210 | +def resolve_base_url(host: str, port: int, base_url: str | None = None) -> str: |
| 211 | + """Construct an origin URL from a bound host/port pair, enforcing |
| 212 | + the spec's ``https://`` requirement for non-loopback targets. |
| 213 | +
|
| 214 | + The AdCP discovery schema requires ``url`` to match ``^https://`` |
| 215 | + on every agent entry; the only documented exception is loopback |
| 216 | + (``127.0.0.1`` / ``localhost`` / ``::1``) for dev binaries that |
| 217 | + haven't terminated TLS yet. This resolver therefore: |
| 218 | +
|
| 219 | + * Projects ``0.0.0.0`` (wildcard) to ``http://127.0.0.1:<port>`` — |
| 220 | + it's a dev-only convenience and the projection IS loopback. |
| 221 | + * Returns ``http://<host>:<port>`` for literal loopback hosts. |
| 222 | + * Pass-through any caller-supplied ``base_url`` that already starts |
| 223 | + with ``https://``. |
| 224 | + * Raises :class:`ValueError` for non-loopback binds without an |
| 225 | + explicit ``base_url=`` (operator MUST publish a TLS URL), and for |
| 226 | + explicit ``base_url=`` that uses ``http://`` against a non- |
| 227 | + loopback host (refuse to publish a non-conformant manifest). |
| 228 | +
|
| 229 | + Raise-at-boot is deliberate: a quietly-mis-published manifest |
| 230 | + survives in CDNs and conformance reports for hours, so we make the |
| 231 | + operator notice on launch instead. |
| 232 | + """ |
| 233 | + is_loopback = host in _LOOPBACK_HOSTS or host in ("0.0.0.0", "::", "") |
| 234 | + |
| 235 | + if base_url is not None: |
| 236 | + if base_url.startswith("https://"): |
| 237 | + return base_url |
| 238 | + if base_url.startswith("http://") and is_loopback: |
| 239 | + return base_url |
| 240 | + raise ValueError( |
| 241 | + "Discovery manifest requires an https:// base_url for non-" |
| 242 | + f"localhost binds (got base_url={base_url!r}, host={host!r}). " |
| 243 | + "The AdCP discovery schema mandates https:// on every " |
| 244 | + "agent entry — pass base_url='https://your-host:port' to " |
| 245 | + "serve()." |
| 246 | + ) |
| 247 | + |
| 248 | + if not is_loopback: |
| 249 | + raise ValueError( |
| 250 | + "Discovery manifest requires base_url= for non-localhost " |
| 251 | + f"binds (host={host!r}); the AdCP discovery schema mandates " |
| 252 | + "https:// URLs and the SDK won't synthesize an http:// URL " |
| 253 | + "for a routable interface. Pass base_url='https://your-" |
| 254 | + "host:port' to serve()." |
| 255 | + ) |
| 256 | + |
| 257 | + # ``0.0.0.0`` is a wildcard bind, not a routable origin. Project to |
| 258 | + # localhost so a default-config dev binary serves a usable manifest |
| 259 | + # for local testing. |
| 260 | + display_host = "127.0.0.1" if host in ("0.0.0.0", "::", "") else host |
| 261 | + return f"http://{display_host}:{port}" |
0 commit comments