Skip to content

Commit c8e4953

Browse files
bokelleyclaude
andcommitted
feat(adagents): divergence detector + ?include=properties for directory inverse-lookup (#749 Part 3, adcp#4894)
Builds on #769's directory wrapper. Adds: - include=["properties"] parameter on fetch_agent_authorizations_from_directory (adcp#4894). Repeated-key form (?include=properties&include=...), not comma-joined. - property_ids: list[str] | None field on DirectoryPublisherEntry. None signals the directory did not return per-publisher IDs (count-only mode); a list signals the directory supports ?include=properties. - detect_publisher_properties_divergence: compares directory inline resolution against per-publisher federated fetches. Full (publisher_domain, property_id) set-diff when property_ids is available; graceful fallback to count-only against older directories. max_concurrency=20 default semaphore caps concurrent fetches at managed-network scale (cafemedia ~6,800 publishers). sample_size=200 default keeps unbounded sweeps opt-in. - PublisherDivergence / DivergenceReport types (Pydantic, matching #769's style). Closes #749 Part 3. Part 2 superseded by #769 (which closed #746). Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1c4e57d commit c8e4953

4 files changed

Lines changed: 621 additions & 1 deletion

File tree

src/adcp/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,10 @@
2222
DirectoryEdgeStatus,
2323
DirectoryPublisherEntry,
2424
DiscoveryMethod,
25+
DivergenceReport,
2526
EntryErrorKind,
27+
PublisherDivergence,
28+
detect_publisher_properties_divergence,
2629
domain_matches,
2730
fetch_adagents,
2831
fetch_adagents_with_cache,
@@ -832,7 +835,10 @@ def get_adcp_version() -> str:
832835
"DirectoryEdgeStatus",
833836
"DirectoryPublisherEntry",
834837
"DiscoveryMethod",
838+
"DivergenceReport",
835839
"EntryErrorKind",
840+
"PublisherDivergence",
841+
"detect_publisher_properties_divergence",
836842
"fetch_adagents",
837843
"fetch_adagents_with_cache",
838844
"fetch_agent_authorizations",

src/adcp/adagents.py

Lines changed: 190 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1804,6 +1804,16 @@ class DirectoryPublisherEntry(AdCPBaseModel):
18041804
signing_keys_pinned: bool | None = None
18051805
status: DirectoryEdgeStatus
18061806
last_verified_at: datetime
1807+
property_ids: list[str] | None = Field(
1808+
default=None,
1809+
description=(
1810+
"Canonical property IDs the agent's selectors resolve to under "
1811+
"this publisher. Present iff the request was made with "
1812+
"include=['properties'] AND the directory server supports it "
1813+
"(per adcp#4894). None signals count-only mode for downstream "
1814+
"consumers."
1815+
),
1816+
)
18071817

18081818

18091819
class AgentAuthorizationsDirectoryResult(AdCPBaseModel):
@@ -1832,6 +1842,7 @@ async def fetch_agent_authorizations_from_directory(
18321842
*,
18331843
directory_url: str,
18341844
since: str | None = None,
1845+
include: list[str] | None = None,
18351846
timeout: float = 10.0,
18361847
client: httpx.AsyncClient | None = None,
18371848
) -> AgentAuthorizationsDirectoryResult:
@@ -1854,6 +1865,15 @@ async def fetch_agent_authorizations_from_directory(
18541865
since: Optional opaque cursor or RFC 3339 timestamp from a prior
18551866
``directory_indexed_at`` — passed through as ``?since=...``
18561867
to limit the result to edges that changed since that point.
1868+
include: Optional list of expansion keys per the AAO directory
1869+
API spec (adcp#4894). Each value is emitted as a separate
1870+
``?include=<value>`` query parameter (repeated-key form, not
1871+
comma-joined). Pass ``["properties"]`` against directories
1872+
that support it to receive per-publisher ``property_ids[]``
1873+
on each row, enabling full set-diff against the publisher's
1874+
own adagents.json. Directories that don't support a given
1875+
expansion key simply omit the corresponding fields from the
1876+
response; callers should treat absence as count-only mode.
18571877
timeout: Request timeout in seconds.
18581878
client: Optional shared ``httpx.AsyncClient`` for connection
18591879
pooling. Caller owns the client lifecycle.
@@ -1890,8 +1910,15 @@ async def fetch_agent_authorizations_from_directory(
18901910
_validate_redirect_url(f"{base}/v1/agents/_/publishers")
18911911

18921912
request_url = f"{base}/v1/agents/{quote(agent_url, safe='')}/publishers"
1913+
query_pairs: list[tuple[str, str]] = []
18931914
if since is not None:
1894-
request_url = f"{request_url}?since={quote(since, safe='')}"
1915+
query_pairs.append(("since", since))
1916+
if include:
1917+
for value in include:
1918+
query_pairs.append(("include", value))
1919+
if query_pairs:
1920+
query_string = "&".join(f"{quote(k, safe='')}={quote(v, safe='')}" for k, v in query_pairs)
1921+
request_url = f"{request_url}?{query_string}"
18951922

18961923
parsed = urlparse(request_url)
18971924
await _dns_validate_host(
@@ -1943,3 +1970,165 @@ async def fetch_agent_authorizations_from_directory(
19431970
raise AdagentsValidationError(
19441971
f"Agent-publishers directory response failed schema validation: {e}"
19451972
) from e
1973+
1974+
1975+
class PublisherDivergence(AdCPBaseModel):
1976+
"""Divergence record for a single publisher domain.
1977+
1978+
``missing_in_inline``: property IDs the federated fetch found in the
1979+
publisher's own adagents.json that the directory did not surface
1980+
(publisher has properties the directory doesn't know about yet).
1981+
1982+
``missing_in_federated``: property IDs the directory claims the agent
1983+
is authorized for but the publisher's own adagents.json does not
1984+
include (stale directory entry or publisher revocation).
1985+
1986+
Both fields are None in count-only fallback mode (directory did
1987+
not return ``property_ids[]``). In count-only mode, count-equality
1988+
does NOT guarantee set-equality — same-count substitutions are
1989+
undetectable. Use ``?include=properties`` (adcp#4894) on directories
1990+
that support it for full set-diff precision.
1991+
1992+
``child_fetch_error`` is non-None when the publisher's adagents.json
1993+
could not be fetched or parsed; other fields carry no meaning.
1994+
"""
1995+
1996+
publisher_domain: str
1997+
directory_properties_authorized: int = Field(ge=0)
1998+
federated_properties_found: int = Field(ge=0)
1999+
missing_in_inline: list[str] | None = None
2000+
missing_in_federated: list[str] | None = None
2001+
child_fetch_error: str | None = None
2002+
2003+
2004+
DivergenceReport = list[PublisherDivergence]
2005+
2006+
2007+
async def detect_publisher_properties_divergence(
2008+
agent_url: str,
2009+
*,
2010+
directory_url: str,
2011+
sample_size: int | None = 200,
2012+
max_concurrency: int = 20,
2013+
timeout: float = 30.0,
2014+
client: httpx.AsyncClient | None = None,
2015+
) -> DivergenceReport:
2016+
"""Compare directory's inline resolution against per-publisher federated fetches.
2017+
2018+
For each publisher the directory lists under ``agent_url``, fetches
2019+
that publisher's own ``adagents.json`` and compares the property set
2020+
against the directory's claim. Returns only publishers where the two
2021+
paths disagree (or where the child fetch failed).
2022+
2023+
Always requests ``include=["properties"]`` from the directory so the
2024+
full ``(publisher_domain, property_id)`` set-diff lights up on
2025+
directories that support adcp#4894. Against older directories that
2026+
return only ``properties_authorized`` counts, falls back to count-
2027+
comparison; ``missing_in_inline`` / ``missing_in_federated`` are
2028+
None in that fallback path.
2029+
2030+
Per adcp#4827 §Resolution-paths, the federated result is
2031+
authoritative when the two paths disagree.
2032+
2033+
Args:
2034+
agent_url: agent to check.
2035+
directory_url: AAO directory base URL (HTTPS only — same SSRF
2036+
gate as :func:`fetch_agent_authorizations_from_directory`).
2037+
sample_size: cap the sweep at N publishers (drawn from the first
2038+
page of directory results). None opts into a full sweep
2039+
across all pages — only do this for small networks. Default
2040+
200 keeps the divergence sweep bounded by default.
2041+
max_concurrency: semaphore-capped concurrent federated fetches.
2042+
Default 20 — caps the burst against publisher origins.
2043+
timeout: per-request timeout (directory + child fetches).
2044+
client: optional shared ``httpx.AsyncClient``.
2045+
2046+
Returns:
2047+
:data:`DivergenceReport` (``list[PublisherDivergence]``). Empty
2048+
list = no divergence detected. Note in count-only fallback mode,
2049+
an empty list means counts agree but set-equality is not
2050+
guaranteed.
2051+
"""
2052+
own_client = client is None
2053+
http = client or httpx.AsyncClient()
2054+
try:
2055+
collected: list[DirectoryPublisherEntry] = []
2056+
cursor: str | None = None
2057+
while True:
2058+
page = await fetch_agent_authorizations_from_directory(
2059+
agent_url,
2060+
directory_url=directory_url,
2061+
since=cursor,
2062+
include=["properties"],
2063+
timeout=timeout,
2064+
client=http,
2065+
)
2066+
collected.extend(page.publishers)
2067+
if sample_size is not None and len(collected) >= sample_size:
2068+
collected = collected[:sample_size]
2069+
break
2070+
cursor = page.next_cursor
2071+
if not cursor:
2072+
break
2073+
2074+
sem = asyncio.Semaphore(max_concurrency)
2075+
2076+
async def _probe(entry: DirectoryPublisherEntry) -> PublisherDivergence | None:
2077+
async with sem:
2078+
try:
2079+
data = await fetch_adagents(
2080+
entry.publisher_domain, timeout=timeout, client=http
2081+
)
2082+
federated_props = get_properties_by_agent(data, agent_url)
2083+
federated_ids = {
2084+
str(p.get("property_id")) for p in federated_props if p.get("property_id")
2085+
}
2086+
except (
2087+
AdagentsNotFoundError,
2088+
AdagentsValidationError,
2089+
AdagentsTimeoutError,
2090+
httpx.HTTPError,
2091+
OSError,
2092+
ValueError,
2093+
) as exc:
2094+
return PublisherDivergence(
2095+
publisher_domain=entry.publisher_domain,
2096+
directory_properties_authorized=entry.properties_authorized,
2097+
federated_properties_found=0,
2098+
missing_in_inline=None,
2099+
missing_in_federated=None,
2100+
child_fetch_error=str(exc),
2101+
)
2102+
2103+
if entry.property_ids is not None:
2104+
# Full set-diff path (adcp#4894).
2105+
dir_ids = set(entry.property_ids)
2106+
missing_in_inline = sorted(federated_ids - dir_ids)
2107+
missing_in_federated = sorted(dir_ids - federated_ids)
2108+
if not missing_in_inline and not missing_in_federated:
2109+
return None
2110+
return PublisherDivergence(
2111+
publisher_domain=entry.publisher_domain,
2112+
directory_properties_authorized=entry.properties_authorized,
2113+
federated_properties_found=len(federated_ids),
2114+
missing_in_inline=missing_in_inline,
2115+
missing_in_federated=missing_in_federated,
2116+
)
2117+
2118+
# Count-only fallback (older directories).
2119+
if len(federated_ids) == entry.properties_authorized:
2120+
return None
2121+
return PublisherDivergence(
2122+
publisher_domain=entry.publisher_domain,
2123+
directory_properties_authorized=entry.properties_authorized,
2124+
federated_properties_found=len(federated_ids),
2125+
missing_in_inline=None,
2126+
missing_in_federated=None,
2127+
)
2128+
2129+
probes = await asyncio.gather(*[_probe(e) for e in collected])
2130+
finally:
2131+
if own_client:
2132+
await http.aclose()
2133+
2134+
return [p for p in probes if p is not None]

tests/fixtures/public_api_snapshot.json

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -121,6 +121,7 @@
121121
"DirectoryEdgeStatus",
122122
"DirectoryPublisherEntry",
123123
"DiscoveryMethod",
124+
"DivergenceReport",
124125
"DomainLookupResult",
125126
"Duration",
126127
"EntryErrorKind",
@@ -253,6 +254,7 @@
253254
"ProvidePerformanceFeedbackRequest",
254255
"ProvidePerformanceFeedbackResponse",
255256
"ProvidePerformanceFeedbackSuccessResponse",
257+
"PublisherDivergence",
256258
"PublisherProperties",
257259
"PublisherPropertiesAll",
258260
"PublisherPropertiesById",
@@ -351,6 +353,7 @@
351353
"create_mcp_webhook_payload",
352354
"create_test_agent",
353355
"creative_agent",
356+
"detect_publisher_properties_divergence",
354357
"domain_matches",
355358
"extract_webhook_result_data",
356359
"fetch_adagents",

0 commit comments

Comments
 (0)