Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 9 additions & 4 deletions src/adcp/compat/legacy/v2_5/_media_buy_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,17 +14,22 @@

from typing import Any

from adcp.compat.legacy.v2_5._url import strip_url_scheme
from adcp.compat.legacy.v2_5._url import extract_brand_domain


def adapt_brand_manifest_to_brand(payload: dict[str, Any]) -> dict[str, Any]:
"""Rewrite v2.5 ``brand_manifest`` (URL string) to v3 ``brand``
``{domain: ...}``. Caller-supplied ``brand`` wins when both fields
are present (half-migrated buyer)."""
``{domain: ...}``. Caller-supplied ``brand`` wins when both fields
are present (half-migrated buyer).

Uses ``extract_brand_domain`` to isolate the hostname from full URLs
(e.g. ``"https://acme.com/.well-known/brand.json"`` → ``"acme.com"``)
so the result satisfies ``BrandReference.domain``'s hostname-only regex.
"""
out = dict(payload)
manifest = out.pop("brand_manifest", None)
if isinstance(manifest, str) and manifest and "brand" not in out:
out["brand"] = {"domain": strip_url_scheme(manifest)}
out["brand"] = {"domain": extract_brand_domain(manifest)}
return out


Expand Down
44 changes: 44 additions & 0 deletions src/adcp/compat/legacy/v2_5/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@

from __future__ import annotations

from urllib.parse import urlparse


def strip_url_scheme(url: str) -> str:
"""``https://acme.example.com/`` → ``acme.example.com``.
Expand All @@ -16,10 +18,52 @@ def strip_url_scheme(url: str) -> str:
after trailing-slash strip), ``http://`` schemes (legacy clients
don't all enforce https), and trailing slashes from sloppy
concatenation.

Does NOT strip URL paths or ports. Use ``extract_brand_domain``
when the input may be a full URL (scheme + path) and you need only
the hostname component.
"""
s = url.strip()
for prefix in ("https://", "http://"):
if s.startswith(prefix):
s = s[len(prefix) :]
break
return s.rstrip("/")


def extract_brand_domain(url: str) -> str:
"""Extract a ``BrandReference.domain``-safe hostname from a brand_manifest URL.

v2.5 ``brand_manifest`` is documented as a URL to a JSON file
(e.g. ``"https://acme.com/.well-known/brand.json"``). The v3
``BrandReference.domain`` field requires a bare hostname matching
``^[a-z0-9]([a-z0-9-]*[a-z0-9])?(\\.[a-z0-9]([a-z0-9-]*[a-z0-9])?)*$``.

Behaviour by input shape:

* Full URL with scheme (``https://acme.com/path``):
``urlparse`` extracts the hostname (``"acme.com"``); path and
port are discarded. Uppercase schemes are normalised by
``urlparse``; uppercase hostnames are lowercased by ``urlparse``
(e.g. ``HTTPS://ACME.COM/path`` → ``"acme.com"``).
* URL with port (``https://acme.com:8443/path``):
hostname is extracted without the port (``"acme.com"``).
* Bare domain without scheme (``acme.com``):
``urlparse`` returns ``hostname=None``; falls back to
``strip_url_scheme`` which returns the input unchanged after
stripping any trailing slashes.
* IPv6 literal (``https://[::1]/path``):
``urlparse`` returns ``hostname="::1"`` (brackets stripped).
This value does not satisfy ``BrandReference.domain``'s regex;
callers must validate the result if IPv6 addresses are possible.

The caller is responsible for ensuring ``url`` is non-empty before
calling (both adapter call sites already gate on
``isinstance(manifest, str) and manifest``).
"""
s = url.strip()
# urlparse correctly handles full URLs: extracts hostname, drops path and port.
# For bare domains (no scheme), urlparse treats the input as a path-only URI
# and returns hostname=None, so we fall back to strip_url_scheme.
hostname = urlparse(s).hostname
return hostname if hostname is not None else strip_url_scheme(s)
10 changes: 7 additions & 3 deletions src/adcp/compat/legacy/v2_5/get_products.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@

from adcp.compat.legacy import register_adapter
from adcp.compat.legacy.types import AdapterPair
from adcp.compat.legacy.v2_5._url import strip_url_scheme
from adcp.compat.legacy.v2_5._url import extract_brand_domain

# v2.5 channel buckets to v3 channel slugs. Multi-mapped buckets resolve
# to all listed v3 channels; downstream consumers can narrow further via
Expand Down Expand Up @@ -127,10 +127,14 @@ def adapt_request(payload: dict[str, Any]) -> dict[str, Any]:
"""Translate a v2.5 ``get_products`` request to v3 shape."""
out = dict(payload)

# brand_manifest (v2.5 URL string) → brand.domain (v3 BrandReference)
# brand_manifest (v2.5 URL string) → brand.domain (v3 BrandReference).
# v2.5 spec documents brand_manifest as a URL to a JSON file, so paths
# are the expected input shape. extract_brand_domain uses urlparse to
# isolate the hostname so the result satisfies BrandReference.domain's
# hostname-only regex.
brand_manifest = out.pop("brand_manifest", None)
if isinstance(brand_manifest, str) and brand_manifest and "brand" not in out:
out["brand"] = {"domain": strip_url_scheme(brand_manifest)}
out["brand"] = {"domain": extract_brand_domain(brand_manifest)}

# promoted_offerings → catalog
promoted = out.pop("promoted_offerings", None)
Expand Down
14 changes: 14 additions & 0 deletions tests/test_legacy_adapter_v2_5_get_products.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,20 @@ def test_brand_manifest_strips_trailing_slash() -> None:
assert out["brand"] == {"domain": "acme.example.com"}


def test_brand_manifest_with_path_extracts_hostname() -> None:
"""Regression for #677: brand_manifest URL with a path must extract only
the hostname so the result satisfies BrandReference.domain's regex."""
out = _adapt({"brand_manifest": "https://acme.com/.well-known/brand.json"})
assert out["brand"] == {"domain": "acme.com"}
assert "brand_manifest" not in out


def test_brand_manifest_with_port_drops_port() -> None:
"""Port numbers are stripped; only the hostname reaches brand.domain."""
out = _adapt({"brand_manifest": "https://acme.com:8443/.well-known/brand.json"})
assert out["brand"] == {"domain": "acme.com"}


def test_brand_field_takes_precedence_over_manifest() -> None:
"""Half-migrated buyer sends both — keep the v3 field."""
out = _adapt(
Expand Down
14 changes: 14 additions & 0 deletions tests/test_legacy_adapter_v2_5_media_buy.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,20 @@ def test_create_media_buy_brand_manifest_becomes_brand_domain() -> None:
assert "brand_manifest" not in out


def test_create_media_buy_brand_manifest_with_path_extracts_hostname() -> None:
"""Regression for #677: brand_manifest URL with a path must extract only
the hostname so the result satisfies BrandReference.domain's regex."""
out = v2_5_cmb.adapt_request({"brand_manifest": "https://acme.com/.well-known/brand.json"})
assert out["brand"] == {"domain": "acme.com"}
assert "brand_manifest" not in out


def test_create_media_buy_brand_manifest_with_port_drops_port() -> None:
"""Port numbers are stripped; only the hostname reaches brand.domain."""
out = v2_5_cmb.adapt_request({"brand_manifest": "https://acme.com:8443/.well-known/brand.json"})
assert out["brand"] == {"domain": "acme.com"}


def test_create_media_buy_brand_wins_over_manifest_when_both_present() -> None:
out = v2_5_cmb.adapt_request(
{
Expand Down
Loading