Skip to content

Commit 6bb2c26

Browse files
bokelleyclaude
andauthored
fix(compat): extract hostname from brand_manifest URLs with paths (#679)
strip_url_scheme only stripped the scheme prefix; for URLs like "https://acme.com/.well-known/brand.json" it produced "acme.com/.well-known/brand.json", which fails BrandReference.domain's regex and surfaces as a confusing INVALID_REQUEST[brand.domain] error. Adds extract_brand_domain() to _url.py — uses urlparse().hostname to isolate the hostname, falling back to strip_url_scheme for bare-domain inputs (no scheme). Both adapter call sites (get_products.adapt_request and _media_buy_helpers.adapt_brand_manifest_to_brand) now use the new helper. Regression tests cover the exact URL-with-path case from #677 and URL-with-port. Fixes #677 https://claude.ai/code/session_01YX14H75zYWQ1BYuopyUL34 Co-authored-by: Claude <noreply@anthropic.com>
1 parent 8632847 commit 6bb2c26

5 files changed

Lines changed: 88 additions & 7 deletions

File tree

src/adcp/compat/legacy/v2_5/_media_buy_helpers.py

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -14,17 +14,22 @@
1414

1515
from typing import Any
1616

17-
from adcp.compat.legacy.v2_5._url import strip_url_scheme
17+
from adcp.compat.legacy.v2_5._url import extract_brand_domain
1818

1919

2020
def adapt_brand_manifest_to_brand(payload: dict[str, Any]) -> dict[str, Any]:
2121
"""Rewrite v2.5 ``brand_manifest`` (URL string) to v3 ``brand``
22-
``{domain: ...}``. Caller-supplied ``brand`` wins when both fields
23-
are present (half-migrated buyer)."""
22+
``{domain: ...}``. Caller-supplied ``brand`` wins when both fields
23+
are present (half-migrated buyer).
24+
25+
Uses ``extract_brand_domain`` to isolate the hostname from full URLs
26+
(e.g. ``"https://acme.com/.well-known/brand.json"`` → ``"acme.com"``)
27+
so the result satisfies ``BrandReference.domain``'s hostname-only regex.
28+
"""
2429
out = dict(payload)
2530
manifest = out.pop("brand_manifest", None)
2631
if isinstance(manifest, str) and manifest and "brand" not in out:
27-
out["brand"] = {"domain": strip_url_scheme(manifest)}
32+
out["brand"] = {"domain": extract_brand_domain(manifest)}
2833
return out
2934

3035

src/adcp/compat/legacy/v2_5/_url.py

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@
88

99
from __future__ import annotations
1010

11+
from urllib.parse import urlparse
12+
1113

1214
def strip_url_scheme(url: str) -> str:
1315
"""``https://acme.example.com/`` → ``acme.example.com``.
@@ -16,10 +18,52 @@ def strip_url_scheme(url: str) -> str:
1618
after trailing-slash strip), ``http://`` schemes (legacy clients
1719
don't all enforce https), and trailing slashes from sloppy
1820
concatenation.
21+
22+
Does NOT strip URL paths or ports. Use ``extract_brand_domain``
23+
when the input may be a full URL (scheme + path) and you need only
24+
the hostname component.
1925
"""
2026
s = url.strip()
2127
for prefix in ("https://", "http://"):
2228
if s.startswith(prefix):
2329
s = s[len(prefix) :]
2430
break
2531
return s.rstrip("/")
32+
33+
34+
def extract_brand_domain(url: str) -> str:
35+
"""Extract a ``BrandReference.domain``-safe hostname from a brand_manifest URL.
36+
37+
v2.5 ``brand_manifest`` is documented as a URL to a JSON file
38+
(e.g. ``"https://acme.com/.well-known/brand.json"``). The v3
39+
``BrandReference.domain`` field requires a bare hostname matching
40+
``^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$``.
41+
42+
Behaviour by input shape:
43+
44+
* Full URL with scheme (``https://acme.com/path``):
45+
``urlparse`` extracts the hostname (``"acme.com"``); path and
46+
port are discarded. Uppercase schemes are normalised by
47+
``urlparse``; uppercase hostnames are lowercased by ``urlparse``
48+
(e.g. ``HTTPS://ACME.COM/path`` → ``"acme.com"``).
49+
* URL with port (``https://acme.com:8443/path``):
50+
hostname is extracted without the port (``"acme.com"``).
51+
* Bare domain without scheme (``acme.com``):
52+
``urlparse`` returns ``hostname=None``; falls back to
53+
``strip_url_scheme`` which returns the input unchanged after
54+
stripping any trailing slashes.
55+
* IPv6 literal (``https://[::1]/path``):
56+
``urlparse`` returns ``hostname="::1"`` (brackets stripped).
57+
This value does not satisfy ``BrandReference.domain``'s regex;
58+
callers must validate the result if IPv6 addresses are possible.
59+
60+
The caller is responsible for ensuring ``url`` is non-empty before
61+
calling (both adapter call sites already gate on
62+
``isinstance(manifest, str) and manifest``).
63+
"""
64+
s = url.strip()
65+
# urlparse correctly handles full URLs: extracts hostname, drops path and port.
66+
# For bare domains (no scheme), urlparse treats the input as a path-only URI
67+
# and returns hostname=None, so we fall back to strip_url_scheme.
68+
hostname = urlparse(s).hostname
69+
return hostname if hostname is not None else strip_url_scheme(s)

src/adcp/compat/legacy/v2_5/get_products.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,7 @@
4040

4141
from adcp.compat.legacy import register_adapter
4242
from adcp.compat.legacy.types import AdapterPair
43-
from adcp.compat.legacy.v2_5._url import strip_url_scheme
43+
from adcp.compat.legacy.v2_5._url import extract_brand_domain
4444

4545
# v2.5 channel buckets to v3 channel slugs. Multi-mapped buckets resolve
4646
# to all listed v3 channels; downstream consumers can narrow further via
@@ -127,10 +127,14 @@ def adapt_request(payload: dict[str, Any]) -> dict[str, Any]:
127127
"""Translate a v2.5 ``get_products`` request to v3 shape."""
128128
out = dict(payload)
129129

130-
# brand_manifest (v2.5 URL string) → brand.domain (v3 BrandReference)
130+
# brand_manifest (v2.5 URL string) → brand.domain (v3 BrandReference).
131+
# v2.5 spec documents brand_manifest as a URL to a JSON file, so paths
132+
# are the expected input shape. extract_brand_domain uses urlparse to
133+
# isolate the hostname so the result satisfies BrandReference.domain's
134+
# hostname-only regex.
131135
brand_manifest = out.pop("brand_manifest", None)
132136
if isinstance(brand_manifest, str) and brand_manifest and "brand" not in out:
133-
out["brand"] = {"domain": strip_url_scheme(brand_manifest)}
137+
out["brand"] = {"domain": extract_brand_domain(brand_manifest)}
134138

135139
# promoted_offerings → catalog
136140
promoted = out.pop("promoted_offerings", None)

tests/test_legacy_adapter_v2_5_get_products.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -57,6 +57,20 @@ def test_brand_manifest_strips_trailing_slash() -> None:
5757
assert out["brand"] == {"domain": "acme.example.com"}
5858

5959

60+
def test_brand_manifest_with_path_extracts_hostname() -> None:
61+
"""Regression for #677: brand_manifest URL with a path must extract only
62+
the hostname so the result satisfies BrandReference.domain's regex."""
63+
out = _adapt({"brand_manifest": "https://acme.com/.well-known/brand.json"})
64+
assert out["brand"] == {"domain": "acme.com"}
65+
assert "brand_manifest" not in out
66+
67+
68+
def test_brand_manifest_with_port_drops_port() -> None:
69+
"""Port numbers are stripped; only the hostname reaches brand.domain."""
70+
out = _adapt({"brand_manifest": "https://acme.com:8443/.well-known/brand.json"})
71+
assert out["brand"] == {"domain": "acme.com"}
72+
73+
6074
def test_brand_field_takes_precedence_over_manifest() -> None:
6175
"""Half-migrated buyer sends both — keep the v3 field."""
6276
out = _adapt(

tests/test_legacy_adapter_v2_5_media_buy.py

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,20 @@ def test_create_media_buy_brand_manifest_becomes_brand_domain() -> None:
4444
assert "brand_manifest" not in out
4545

4646

47+
def test_create_media_buy_brand_manifest_with_path_extracts_hostname() -> None:
48+
"""Regression for #677: brand_manifest URL with a path must extract only
49+
the hostname so the result satisfies BrandReference.domain's regex."""
50+
out = v2_5_cmb.adapt_request({"brand_manifest": "https://acme.com/.well-known/brand.json"})
51+
assert out["brand"] == {"domain": "acme.com"}
52+
assert "brand_manifest" not in out
53+
54+
55+
def test_create_media_buy_brand_manifest_with_port_drops_port() -> None:
56+
"""Port numbers are stripped; only the hostname reaches brand.domain."""
57+
out = v2_5_cmb.adapt_request({"brand_manifest": "https://acme.com:8443/.well-known/brand.json"})
58+
assert out["brand"] == {"domain": "acme.com"}
59+
60+
4761
def test_create_media_buy_brand_wins_over_manifest_when_both_present() -> None:
4862
out = v2_5_cmb.adapt_request(
4963
{

0 commit comments

Comments
 (0)