Skip to content

Commit 0b70d33

Browse files
committed
fix(adagents): address review on publisher_properties resolution (#750)
Tightens authorization-decision semantics in `_resolve_agent_properties`: - Dedup key is now `(publisher_domain, property_id)` instead of bare `property_id`, so the same id under different parent domains stays distinct and missing-id properties no longer collapse to a single entry. Missing `property_id` now raises `AdagentsValidationError` (fail-fast per CLAUDE.md). - New `_validate_publisher_properties_selector` rejects two illegal selector shapes per adcp#4827: mixing scalar `publisher_domain` with array `publisher_domains`, and pairing `selection_type='by_id'` with the compact `publisher_domains[]` fan-out form (no defined per-domain id partitioning). - `_resolve_inline` fails closed on unknown `selection_type` (returns `[]` instead of all candidates) and on `selection_type='by_tag'` with empty `property_tags` — both are authorization decisions where a permissive fallback would silently grant access on schema drift. - Documents that parent-file `revoked_publisher_domains[]` (when implemented) is honored transparently because revoked domains are pre-filtered out of the per-domain index before resolution. Adds tests covering each rejection path, the new fail-closed behaviors, and the missing-property_id fail-fast. Adds a 2.0s wall-clock bound to the cafemedia scale test to catch O(N×M) regressions without depending on pytest-timeout (not currently a project dep).
1 parent 2f8f087 commit 0b70d33

2 files changed

Lines changed: 217 additions & 20 deletions

File tree

src/adcp/adagents.py

Lines changed: 60 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -955,6 +955,11 @@ def _resolve_agent_properties(
955955
# fetch. Federated fetch (per-domain HTTP) is a follow-up; this change
956956
# fixes the primary bug of returning raw selector dicts instead of resolved
957957
# property objects.
958+
#
959+
# Revocation: parent-file `revoked_publisher_domains[]` (when implemented)
960+
# pre-filters revoked domains from the top-level properties[] before this
961+
# function is called; inline resolution then honors revocation transparently
962+
# because the per-domain index built below never sees revoked-domain entries.
958963
if authorization_type == "publisher_properties":
959964
selectors = agent.get("publisher_properties", [])
960965
if not isinstance(selectors, list):
@@ -969,17 +974,30 @@ def _resolve_agent_properties(
969974
if isinstance(d, str) and d:
970975
domain_index.setdefault(d, []).append(p)
971976
resolved: list[dict[str, Any]] = []
972-
seen_ids: set[str | None] = set()
977+
# Key dedup on (publisher_domain, property_id) — same property_id on
978+
# different domains is a legitimate distinct property, and missing
979+
# property_id collapses to a single entry under bare `str` keying.
980+
seen_keys: set[tuple[str, str]] = set()
973981
for selector in selectors:
974982
if not isinstance(selector, dict):
975983
continue
984+
_validate_publisher_properties_selector(selector)
976985
for domain in _selector_domains(selector):
977986
inline = _resolve_inline(selector, domain_index, domain)
978987
if inline is not None:
979988
for prop in inline:
980989
pid = prop.get("property_id")
981-
if pid not in seen_ids:
982-
seen_ids.add(pid)
990+
if not isinstance(pid, str) or not pid:
991+
raise AdagentsValidationError(
992+
f"property under domain={domain!r} is missing "
993+
"required 'property_id'"
994+
)
995+
prop_domain = prop.get("publisher_domain")
996+
if not isinstance(prop_domain, str):
997+
prop_domain = domain
998+
key = (prop_domain, pid)
999+
if key not in seen_keys:
1000+
seen_keys.add(key)
9831001
resolved.append(prop)
9841002
# inline succeeded; skip federated fetch for this domain
9851003
# inline is None → no parent-file data for domain; federated
@@ -989,6 +1007,33 @@ def _resolve_agent_properties(
9891007
return []
9901008

9911009

1010+
def _validate_publisher_properties_selector(selector: dict[str, Any]) -> None:
1011+
"""Reject illegal publisher_properties selector shapes per adcp#4827.
1012+
1013+
Raises ``AdagentsValidationError`` for shapes that have no valid semantics:
1014+
1015+
* Both ``publisher_domain`` (scalar) and ``publisher_domains`` (compact array
1016+
form) are set — the spec forbids mixing the two domain encodings on the
1017+
same selector.
1018+
* ``selection_type == "by_id"`` combined with the compact ``publisher_domains``
1019+
array — property_ids are publisher-scoped and the compact fan-out form does
1020+
not carry per-domain id partitioning, so this combination has no defined
1021+
meaning.
1022+
"""
1023+
has_scalar = "publisher_domain" in selector and selector.get("publisher_domain")
1024+
has_array = "publisher_domains" in selector and selector.get("publisher_domains")
1025+
if has_scalar and has_array:
1026+
raise AdagentsValidationError(
1027+
"publisher_properties selector may not set both 'publisher_domain' "
1028+
"and 'publisher_domains'"
1029+
)
1030+
if selector.get("selection_type") == "by_id" and has_array:
1031+
raise AdagentsValidationError(
1032+
"publisher_properties selector with selection_type='by_id' may not "
1033+
"use the compact 'publisher_domains' form"
1034+
)
1035+
1036+
9921037
def _selector_domains(selector: dict[str, Any]) -> list[str]:
9931038
"""Extract publisher domain(s) from a publisher_properties selector.
9941039
@@ -1021,7 +1066,9 @@ def _resolve_inline(
10211066
filter — this is a real empty set; do NOT fall back.
10221067
10231068
Handles ``selection_type`` values: ``"all"``, ``"by_tag"``, ``"by_id"``.
1024-
Unknown types are treated permissively (return all domain candidates).
1069+
Unknown ``selection_type`` values fail closed (return ``[]``) — this is an
1070+
authorization-decision path; permissive fallbacks would silently grant
1071+
access on schema drift or typos.
10251072
"""
10261073
candidates = domain_index.get(domain)
10271074
if not candidates:
@@ -1033,16 +1080,21 @@ def _resolve_inline(
10331080
if selection_type == "by_tag":
10341081
required_tags = {t for t in selector.get("property_tags", []) if isinstance(t, str)}
10351082
if not required_tags:
1036-
return list(candidates)
1083+
# No tags supplied → no authorization. Fail-closed; the spec's
1084+
# `minItems: 1` should reject this upstream, but we don't rely on it.
1085+
return []
10371086
return [
1038-
p for p in candidates
1087+
p
1088+
for p in candidates
10391089
if required_tags & {t for t in p.get("tags", []) if isinstance(t, str)}
10401090
]
10411091
if selection_type == "by_id":
10421092
required_ids = {i for i in selector.get("property_ids", []) if isinstance(i, str)}
10431093
return [p for p in candidates if p.get("property_id") in required_ids]
1044-
# Unknown selection_type — permissive fallback
1045-
return list(candidates)
1094+
# Unknown selection_type → fail closed. This is an authorization-decision
1095+
# path; per CLAUDE.md "no fallbacks", an unrecognized selection type must
1096+
# grant nothing.
1097+
return []
10461098

10471099

10481100
def get_all_properties(adagents_data: dict[str, Any]) -> list[dict[str, Any]]:

tests/test_adagents.py

Lines changed: 157 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1478,32 +1478,171 @@ def test_get_properties_by_agent_publisher_properties_by_id(self):
14781478
properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
14791479
assert {p["property_id"] for p in properties} == {"ctv-001"}
14801480

1481+
def test_selector_with_both_domain_forms_raises(self):
1482+
"""Selector setting both publisher_domain and publisher_domains is rejected."""
1483+
adagents_data = {
1484+
"properties": [
1485+
{"property_id": "a-001", "publisher_domain": "site-a.com", "name": "Site A"},
1486+
],
1487+
"authorized_agents": [
1488+
{
1489+
"url": "https://agent1.example.com",
1490+
"authorization_type": "publisher_properties",
1491+
"authorized_for": "Test",
1492+
"publisher_properties": [
1493+
{
1494+
"publisher_domain": "site-a.com",
1495+
"publisher_domains": ["site-b.com"],
1496+
"selection_type": "all",
1497+
},
1498+
],
1499+
},
1500+
],
1501+
}
1502+
with pytest.raises(AdagentsValidationError, match="may not set both"):
1503+
get_properties_by_agent(adagents_data, "https://agent1.example.com")
1504+
1505+
def test_by_id_with_publisher_domains_raises(self):
1506+
"""selection_type='by_id' combined with publisher_domains[] is rejected."""
1507+
adagents_data = {
1508+
"properties": [
1509+
{"property_id": "a-001", "publisher_domain": "site-a.com", "name": "Site A"},
1510+
],
1511+
"authorized_agents": [
1512+
{
1513+
"url": "https://agent1.example.com",
1514+
"authorization_type": "publisher_properties",
1515+
"authorized_for": "Test",
1516+
"publisher_properties": [
1517+
{
1518+
"publisher_domains": ["site-a.com", "site-b.com"],
1519+
"selection_type": "by_id",
1520+
"property_ids": ["a-001"],
1521+
},
1522+
],
1523+
},
1524+
],
1525+
}
1526+
with pytest.raises(
1527+
AdagentsValidationError, match="selection_type='by_id'.*publisher_domains"
1528+
):
1529+
get_properties_by_agent(adagents_data, "https://agent1.example.com")
1530+
1531+
def test_unknown_selection_type_returns_empty(self):
1532+
"""Unknown selection_type fails closed — no authorization granted."""
1533+
adagents_data = {
1534+
"properties": [
1535+
{"property_id": "a-001", "publisher_domain": "site-a.com", "name": "Site A"},
1536+
{"property_id": "a-002", "publisher_domain": "site-a.com", "name": "Site A2"},
1537+
],
1538+
"authorized_agents": [
1539+
{
1540+
"url": "https://agent1.example.com",
1541+
"authorization_type": "publisher_properties",
1542+
"authorized_for": "Test",
1543+
"publisher_properties": [
1544+
{
1545+
"publisher_domain": "site-a.com",
1546+
"selection_type": "by_category",
1547+
},
1548+
],
1549+
},
1550+
],
1551+
}
1552+
properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
1553+
assert properties == []
1554+
1555+
def test_by_tag_with_empty_property_tags_returns_empty(self):
1556+
"""selection_type='by_tag' with empty property_tags grants nothing."""
1557+
adagents_data = {
1558+
"properties": [
1559+
{
1560+
"property_id": "a-001",
1561+
"publisher_domain": "site-a.com",
1562+
"name": "Site A",
1563+
"tags": ["news"],
1564+
},
1565+
],
1566+
"authorized_agents": [
1567+
{
1568+
"url": "https://agent1.example.com",
1569+
"authorization_type": "publisher_properties",
1570+
"authorized_for": "Test",
1571+
"publisher_properties": [
1572+
{
1573+
"publisher_domain": "site-a.com",
1574+
"selection_type": "by_tag",
1575+
"property_tags": [],
1576+
},
1577+
],
1578+
},
1579+
],
1580+
}
1581+
properties = get_properties_by_agent(adagents_data, "https://agent1.example.com")
1582+
assert properties == []
1583+
1584+
def test_property_missing_property_id_raises(self):
1585+
"""Parent property without property_id under publisher_properties resolution
1586+
fails fast — every resolved property must carry an id (used for dedup).
1587+
"""
1588+
adagents_data = {
1589+
"properties": [
1590+
{
1591+
# NB: no property_id
1592+
"publisher_domain": "site-a.com",
1593+
"name": "Site A",
1594+
"tags": ["news"],
1595+
},
1596+
],
1597+
"authorized_agents": [
1598+
{
1599+
"url": "https://agent1.example.com",
1600+
"authorization_type": "publisher_properties",
1601+
"authorized_for": "Test",
1602+
"publisher_properties": [
1603+
{
1604+
"publisher_domain": "site-a.com",
1605+
"selection_type": "all",
1606+
},
1607+
],
1608+
},
1609+
],
1610+
}
1611+
with pytest.raises(AdagentsValidationError, match="missing required 'property_id'"):
1612+
get_properties_by_agent(adagents_data, "https://agent1.example.com")
1613+
14811614
def test_get_properties_by_agent_cafemedia_scale(self):
14821615
"""Cafemedia/interchange.io canonical fixture: 6,843 inline properties across
14831616
6,800 child domains, all raptive_managed, one authorized agent.
14841617
14851618
Sized to catch O(N×M) regressions — at this scale an unindexed
14861619
implementation (~46 M ops) would cause a multi-second timeout.
14871620
"""
1621+
import time
1622+
14881623
# 6,800 child publisher domains (cafemedia fan-out shape)
14891624
child_domains = [f"site{i:04d}.raptive.com" for i in range(6800)]
14901625
properties: list[dict] = []
14911626
# One property per child domain
14921627
for i, domain in enumerate(child_domains):
1493-
properties.append({
1494-
"property_id": f"p-{i:05d}",
1495-
"publisher_domain": domain,
1496-
"name": f"Site {i} — Raptive Managed",
1497-
"tags": ["raptive_managed"],
1498-
})
1628+
properties.append(
1629+
{
1630+
"property_id": f"p-{i:05d}",
1631+
"publisher_domain": domain,
1632+
"name": f"Site {i} — Raptive Managed",
1633+
"tags": ["raptive_managed"],
1634+
}
1635+
)
14991636
# 43 extra properties on the first 43 domains (total: 6,843)
15001637
for i in range(43):
1501-
properties.append({
1502-
"property_id": f"extra-{i:03d}",
1503-
"publisher_domain": child_domains[i],
1504-
"name": f"Site {i} Extra Property",
1505-
"tags": ["raptive_managed", "ctv"],
1506-
})
1638+
properties.append(
1639+
{
1640+
"property_id": f"extra-{i:03d}",
1641+
"publisher_domain": child_domains[i],
1642+
"name": f"Site {i} Extra Property",
1643+
"tags": ["raptive_managed", "ctv"],
1644+
}
1645+
)
15071646

15081647
adagents_data = {
15091648
"properties": properties,
@@ -1523,7 +1662,13 @@ def test_get_properties_by_agent_cafemedia_scale(self):
15231662
],
15241663
}
15251664

1665+
start = time.perf_counter()
15261666
result = get_properties_by_agent(adagents_data, "https://interchange.io")
1667+
elapsed = time.perf_counter() - start
1668+
assert elapsed < 2.0, (
1669+
f"resolution took {elapsed:.2f}s at cafemedia scale "
1670+
"(>2s indicates O(N×M) regression)"
1671+
)
15271672
assert len(result) == 6843
15281673
result_domains = {p["publisher_domain"] for p in result}
15291674
assert result_domains == set(child_domains) # all 6,800 must appear, not just a subset

0 commit comments

Comments
 (0)