@@ -1804,6 +1804,16 @@ class DirectoryPublisherEntry(AdCPBaseModel):
18041804 signing_keys_pinned : bool | None = None
18051805 status : DirectoryEdgeStatus
18061806 last_verified_at : datetime
1807+ property_ids : list [str ] | None = Field (
1808+ default = None ,
1809+ description = (
1810+ "Canonical property IDs the agent's selectors resolve to under "
1811+ "this publisher. Present iff the request was made with "
1812+ "include=['properties'] AND the directory server supports it "
1813+ "(per adcp#4894). None signals count-only mode for downstream "
1814+ "consumers."
1815+ ),
1816+ )
18071817
18081818
18091819class AgentAuthorizationsDirectoryResult (AdCPBaseModel ):
@@ -1832,6 +1842,7 @@ async def fetch_agent_authorizations_from_directory(
18321842 * ,
18331843 directory_url : str ,
18341844 since : str | None = None ,
1845+ include : list [str ] | None = None ,
18351846 timeout : float = 10.0 ,
18361847 client : httpx .AsyncClient | None = None ,
18371848) -> AgentAuthorizationsDirectoryResult :
@@ -1854,6 +1865,15 @@ async def fetch_agent_authorizations_from_directory(
18541865 since: Optional opaque cursor or RFC 3339 timestamp from a prior
18551866 ``directory_indexed_at`` — passed through as ``?since=...``
18561867 to limit the result to edges that changed since that point.
1868+ include: Optional list of expansion keys per the AAO directory
1869+ API spec (adcp#4894). Each value is emitted as a separate
1870+ ``?include=<value>`` query parameter (repeated-key form, not
1871+ comma-joined). Pass ``["properties"]`` against directories
1872+ that support it to receive per-publisher ``property_ids[]``
1873+ on each row, enabling full set-diff against the publisher's
1874+ own adagents.json. Directories that don't support a given
1875+ expansion key simply omit the corresponding fields from the
1876+ response; callers should treat absence as count-only mode.
18571877 timeout: Request timeout in seconds.
18581878 client: Optional shared ``httpx.AsyncClient`` for connection
18591879 pooling. Caller owns the client lifecycle.
@@ -1890,8 +1910,15 @@ async def fetch_agent_authorizations_from_directory(
18901910 _validate_redirect_url (f"{ base } /v1/agents/_/publishers" )
18911911
18921912 request_url = f"{ base } /v1/agents/{ quote (agent_url , safe = '' )} /publishers"
1913+ query_pairs : list [tuple [str , str ]] = []
18931914 if since is not None :
1894- request_url = f"{ request_url } ?since={ quote (since , safe = '' )} "
1915+ query_pairs .append (("since" , since ))
1916+ if include :
1917+ for value in include :
1918+ query_pairs .append (("include" , value ))
1919+ if query_pairs :
1920+ query_string = "&" .join (f"{ quote (k , safe = '' )} ={ quote (v , safe = '' )} " for k , v in query_pairs )
1921+ request_url = f"{ request_url } ?{ query_string } "
18951922
18961923 parsed = urlparse (request_url )
18971924 await _dns_validate_host (
@@ -1943,3 +1970,165 @@ async def fetch_agent_authorizations_from_directory(
19431970 raise AdagentsValidationError (
19441971 f"Agent-publishers directory response failed schema validation: { e } "
19451972 ) from e
1973+
1974+
1975+ class PublisherDivergence (AdCPBaseModel ):
1976+ """Divergence record for a single publisher domain.
1977+
1978+ ``missing_in_inline``: property IDs the federated fetch found in the
1979+ publisher's own adagents.json that the directory did not surface
1980+ (publisher has properties the directory doesn't know about yet).
1981+
1982+ ``missing_in_federated``: property IDs the directory claims the agent
1983+ is authorized for but the publisher's own adagents.json does not
1984+ include (stale directory entry or publisher revocation).
1985+
1986+ Both fields are None in count-only fallback mode (directory did
1987+ not return ``property_ids[]``). In count-only mode, count-equality
1988+ does NOT guarantee set-equality — same-count substitutions are
1989+ undetectable. Use ``?include=properties`` (adcp#4894) on directories
1990+ that support it for full set-diff precision.
1991+
1992+ ``child_fetch_error`` is non-None when the publisher's adagents.json
1993+ could not be fetched or parsed; other fields carry no meaning.
1994+ """
1995+
1996+ publisher_domain : str
1997+ directory_properties_authorized : int = Field (ge = 0 )
1998+ federated_properties_found : int = Field (ge = 0 )
1999+ missing_in_inline : list [str ] | None = None
2000+ missing_in_federated : list [str ] | None = None
2001+ child_fetch_error : str | None = None
2002+
2003+
2004+ DivergenceReport = list [PublisherDivergence ]
2005+
2006+
2007+ async def detect_publisher_properties_divergence (
2008+ agent_url : str ,
2009+ * ,
2010+ directory_url : str ,
2011+ sample_size : int | None = 200 ,
2012+ max_concurrency : int = 20 ,
2013+ timeout : float = 30.0 ,
2014+ client : httpx .AsyncClient | None = None ,
2015+ ) -> DivergenceReport :
2016+ """Compare directory's inline resolution against per-publisher federated fetches.
2017+
2018+ For each publisher the directory lists under ``agent_url``, fetches
2019+ that publisher's own ``adagents.json`` and compares the property set
2020+ against the directory's claim. Returns only publishers where the two
2021+ paths disagree (or where the child fetch failed).
2022+
2023+ Always requests ``include=["properties"]`` from the directory so the
2024+ full ``(publisher_domain, property_id)`` set-diff lights up on
2025+ directories that support adcp#4894. Against older directories that
2026+ return only ``properties_authorized`` counts, falls back to count-
2027+ comparison; ``missing_in_inline`` / ``missing_in_federated`` are
2028+ None in that fallback path.
2029+
2030+ Per adcp#4827 §Resolution-paths, the federated result is
2031+ authoritative when the two paths disagree.
2032+
2033+ Args:
2034+ agent_url: agent to check.
2035+ directory_url: AAO directory base URL (HTTPS only — same SSRF
2036+ gate as :func:`fetch_agent_authorizations_from_directory`).
2037+ sample_size: cap the sweep at N publishers (drawn from the first
2038+ page of directory results). None opts into a full sweep
2039+ across all pages — only do this for small networks. Default
2040+ 200 keeps the divergence sweep bounded by default.
2041+ max_concurrency: semaphore-capped concurrent federated fetches.
2042+ Default 20 — caps the burst against publisher origins.
2043+ timeout: per-request timeout (directory + child fetches).
2044+ client: optional shared ``httpx.AsyncClient``.
2045+
2046+ Returns:
2047+ :data:`DivergenceReport` (``list[PublisherDivergence]``). Empty
2048+ list = no divergence detected. Note in count-only fallback mode,
2049+ an empty list means counts agree but set-equality is not
2050+ guaranteed.
2051+ """
2052+ own_client = client is None
2053+ http = client or httpx .AsyncClient ()
2054+ try :
2055+ collected : list [DirectoryPublisherEntry ] = []
2056+ cursor : str | None = None
2057+ while True :
2058+ page = await fetch_agent_authorizations_from_directory (
2059+ agent_url ,
2060+ directory_url = directory_url ,
2061+ since = cursor ,
2062+ include = ["properties" ],
2063+ timeout = timeout ,
2064+ client = http ,
2065+ )
2066+ collected .extend (page .publishers )
2067+ if sample_size is not None and len (collected ) >= sample_size :
2068+ collected = collected [:sample_size ]
2069+ break
2070+ cursor = page .next_cursor
2071+ if not cursor :
2072+ break
2073+
2074+ sem = asyncio .Semaphore (max_concurrency )
2075+
2076+ async def _probe (entry : DirectoryPublisherEntry ) -> PublisherDivergence | None :
2077+ async with sem :
2078+ try :
2079+ data = await fetch_adagents (
2080+ entry .publisher_domain , timeout = timeout , client = http
2081+ )
2082+ federated_props = get_properties_by_agent (data , agent_url )
2083+ federated_ids = {
2084+ str (p .get ("property_id" )) for p in federated_props if p .get ("property_id" )
2085+ }
2086+ except (
2087+ AdagentsNotFoundError ,
2088+ AdagentsValidationError ,
2089+ AdagentsTimeoutError ,
2090+ httpx .HTTPError ,
2091+ OSError ,
2092+ ValueError ,
2093+ ) as exc :
2094+ return PublisherDivergence (
2095+ publisher_domain = entry .publisher_domain ,
2096+ directory_properties_authorized = entry .properties_authorized ,
2097+ federated_properties_found = 0 ,
2098+ missing_in_inline = None ,
2099+ missing_in_federated = None ,
2100+ child_fetch_error = str (exc ),
2101+ )
2102+
2103+ if entry .property_ids is not None :
2104+ # Full set-diff path (adcp#4894).
2105+ dir_ids = set (entry .property_ids )
2106+ missing_in_inline = sorted (federated_ids - dir_ids )
2107+ missing_in_federated = sorted (dir_ids - federated_ids )
2108+ if not missing_in_inline and not missing_in_federated :
2109+ return None
2110+ return PublisherDivergence (
2111+ publisher_domain = entry .publisher_domain ,
2112+ directory_properties_authorized = entry .properties_authorized ,
2113+ federated_properties_found = len (federated_ids ),
2114+ missing_in_inline = missing_in_inline ,
2115+ missing_in_federated = missing_in_federated ,
2116+ )
2117+
2118+ # Count-only fallback (older directories).
2119+ if len (federated_ids ) == entry .properties_authorized :
2120+ return None
2121+ return PublisherDivergence (
2122+ publisher_domain = entry .publisher_domain ,
2123+ directory_properties_authorized = entry .properties_authorized ,
2124+ federated_properties_found = len (federated_ids ),
2125+ missing_in_inline = None ,
2126+ missing_in_federated = None ,
2127+ )
2128+
2129+ probes = await asyncio .gather (* [_probe (e ) for e in collected ])
2130+ finally :
2131+ if own_client :
2132+ await http .aclose ()
2133+
2134+ return [p for p in probes if p is not None ]
0 commit comments