Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 11 additions & 26 deletions src/adcp/compat/legacy/v2_5/_media_buy_helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,19 +12,12 @@

from __future__ import annotations

import logging
from typing import Any
from urllib.parse import urlparse

from adcp.compat.legacy.v2_5._url import extract_brand_domain

_logger = logging.getLogger(__name__)

# Paths that v3 sellers reconstruct correctly — no information is lost.
_STANDARD_BRAND_MANIFEST_PATHS = {"", "/", "/.well-known/brand.json"}

# Per-URL dedup so high-RPS v2.5 buyers don't saturate the log pipeline.
_brand_manifest_path_warned: set[str] = set()
from adcp.compat.legacy.v2_5._url import (
extract_brand_domain,
warn_brand_manifest_path_lossy,
)


def adapt_brand_manifest_to_brand(payload: dict[str, Any]) -> dict[str, Any]:
Expand All @@ -40,30 +33,22 @@ def adapt_brand_manifest_to_brand(payload: dict[str, Any]) -> dict[str, Any]:
Uses ``extract_brand_domain`` to isolate the hostname from full URLs
(e.g. ``"https://acme.com/.well-known/brand.json"`` → ``"acme.com"``)
so the result satisfies ``BrandReference.domain``'s hostname-only regex.
``warn_brand_manifest_path_lossy`` surfaces a one-time warning when the
original URL's path won't be reconstructed by v3 sellers — applied
uniformly to both the URL-string and inline-object branches.
"""
out = dict(payload)
manifest = out.pop("brand_manifest", None)
if isinstance(manifest, str) and manifest and "brand" not in out:
domain = extract_brand_domain(manifest)
parsed = urlparse(manifest.strip())
if (
parsed.hostname is not None
and parsed.path not in _STANDARD_BRAND_MANIFEST_PATHS
and manifest not in _brand_manifest_path_warned
):
_brand_manifest_path_warned.add(manifest)
_logger.warning(
"brand_manifest at %s uses a non-standard path; "
"v3 sellers derive %s/.well-known/brand.json from BrandReference.domain. "
"Manifest fetch may 404.",
manifest,
domain,
)
warn_brand_manifest_path_lossy(manifest, domain)
out["brand"] = {"domain": domain}
elif isinstance(manifest, dict) and "brand" not in out:
url = manifest.get("url")
if isinstance(url, str) and url.strip():
out["brand"] = {"domain": extract_brand_domain(url)}
domain = extract_brand_domain(url)
warn_brand_manifest_path_lossy(url, domain)
out["brand"] = {"domain": domain}
return out


Expand Down
50 changes: 50 additions & 0 deletions src/adcp/compat/legacy/v2_5/_url.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,19 @@

from __future__ import annotations

import logging
from urllib.parse import urlparse

_logger = logging.getLogger(__name__)

# Paths that v3 sellers reconstruct correctly from ``brand.domain`` — no
# information is lost when the adapter flattens these to the hostname.
_STANDARD_BRAND_MANIFEST_PATHS = {"", "/", "/.well-known/brand.json"}

# Per-URL dedup so high-RPS v2.5 buyers don't saturate the log pipeline
# when many requests carry the same non-canonical manifest URL.
_brand_manifest_path_warned: set[str] = set()


def strip_url_scheme(url: str) -> str:
"""``https://acme.example.com/`` → ``acme.example.com``.
Expand Down Expand Up @@ -67,3 +78,42 @@ def extract_brand_domain(url: str) -> str:
# and returns hostname=None, so we fall back to strip_url_scheme.
hostname = urlparse(s).hostname
return hostname if hostname is not None else strip_url_scheme(s)


def warn_brand_manifest_path_lossy(manifest_url: str, domain: str) -> None:
"""Emit a one-time WARNING if ``manifest_url`` has a path v3 sellers
cannot reconstruct from ``BrandReference.domain``.

v3 sellers derive the canonical manifest URL as
``https://{domain}/.well-known/brand.json``. When the v2.5 buyer's
original ``brand_manifest`` URL pointed at a different path (e.g. a
CDN-hosted manifest), translating to ``brand.domain`` silently drops
that path and the v3 seller's fetch will likely 404.

The adapter cannot avoid this — v3 ``BrandReference`` is hostname-only
by schema (``additionalProperties: false``). The warning surfaces the
lossy mapping to operators so debugging downstream 404s doesn't start
from "no signal in the SDK".

Inputs with no derivable hostname (bare-domain strings, blank URLs)
are ignored — those don't represent a path mapping at all.

Dedup is per-URL across the process lifetime; the cache is
intentionally unbounded since per-deployment cardinality of distinct
``brand_manifest`` URLs is small.
"""
parsed = urlparse(manifest_url.strip())
if parsed.hostname is None:
return
if parsed.path in _STANDARD_BRAND_MANIFEST_PATHS:
return
if manifest_url in _brand_manifest_path_warned:
return
_brand_manifest_path_warned.add(manifest_url)
_logger.warning(
"brand_manifest at %s uses a non-standard path; "
"v3 sellers derive %s/.well-known/brand.json from BrandReference.domain. "
"Manifest fetch may 404.",
manifest_url,
domain,
)
34 changes: 8 additions & 26 deletions src/adcp/compat/legacy/v2_5/get_products.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,21 +36,14 @@

from __future__ import annotations

import logging
from typing import Any
from urllib.parse import urlparse

from adcp.compat.legacy import register_adapter
from adcp.compat.legacy.types import AdapterPair
from adcp.compat.legacy.v2_5._url import extract_brand_domain

_logger = logging.getLogger(__name__)

# Paths that v3 sellers reconstruct correctly — no information is lost.
_STANDARD_BRAND_MANIFEST_PATHS = {"", "/", "/.well-known/brand.json"}

# Per-URL dedup so high-RPS v2.5 buyers don't saturate the log pipeline.
_brand_manifest_path_warned: set[str] = set()
from adcp.compat.legacy.v2_5._url import (
extract_brand_domain,
warn_brand_manifest_path_lossy,
)

# v2.5 channel buckets to v3 channel slugs. Multi-mapped buckets resolve
# to all listed v3 channels; downstream consumers can narrow further via
Expand Down Expand Up @@ -146,25 +139,14 @@ def adapt_request(payload: dict[str, Any]) -> dict[str, Any]:
brand_manifest = out.pop("brand_manifest", None)
if isinstance(brand_manifest, str) and brand_manifest and "brand" not in out:
domain = extract_brand_domain(brand_manifest)
parsed = urlparse(brand_manifest.strip())
if (
parsed.hostname is not None
and parsed.path not in _STANDARD_BRAND_MANIFEST_PATHS
and brand_manifest not in _brand_manifest_path_warned
):
_brand_manifest_path_warned.add(brand_manifest)
_logger.warning(
"brand_manifest at %s uses a non-standard path; "
"v3 sellers derive %s/.well-known/brand.json from BrandReference.domain. "
"Manifest fetch may 404.",
brand_manifest,
domain,
)
warn_brand_manifest_path_lossy(brand_manifest, domain)
out["brand"] = {"domain": domain}
elif isinstance(brand_manifest, dict) and "brand" not in out:
url = brand_manifest.get("url")
if isinstance(url, str) and url.strip():
out["brand"] = {"domain": extract_brand_domain(url)}
domain = extract_brand_domain(url)
warn_brand_manifest_path_lossy(url, domain)
out["brand"] = {"domain": domain}

# promoted_offerings → catalog
promoted = out.pop("promoted_offerings", None)
Expand Down
74 changes: 56 additions & 18 deletions tests/test_legacy_adapter_v2_5_get_products.py
Original file line number Diff line number Diff line change
Expand Up @@ -141,22 +141,22 @@ def test_brand_manifest_inline_object_brand_wins_when_both_present() -> None:


# ---------------------------------------------------------------------------
# Request: brand_manifest non-standard-path warning (issue #683)
# Request: brand_manifest non-standard-path warning (issues #683, #687)
# ---------------------------------------------------------------------------

_GP_LOGGER = "adcp.compat.legacy.v2_5.get_products"
_URL_LOGGER = "adcp.compat.legacy.v2_5._url"


def test_brand_manifest_cdn_url_warns(caplog, monkeypatch) -> None: # type: ignore[no-untyped-def]
"""CDN brand_manifest with non-standard path emits a WARNING about the
information loss so operators can debug downstream 404s."""
import adcp.compat.legacy.v2_5.get_products as _gp
import adcp.compat.legacy.v2_5._url as _url_mod

monkeypatch.setattr(_gp, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_GP_LOGGER):
monkeypatch.setattr(_url_mod, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_URL_LOGGER):
out = _adapt({"brand_manifest": "https://cdn.acmecorp.com/brand-manifest.json"})
assert out["brand"] == {"domain": "cdn.acmecorp.com"}
records = [r for r in caplog.records if r.name == _GP_LOGGER]
records = [r for r in caplog.records if r.name == _URL_LOGGER]
assert len(records) == 1
msg = records[0].getMessage()
assert "non-standard path" in msg
Expand All @@ -166,38 +166,76 @@ def test_brand_manifest_cdn_url_warns(caplog, monkeypatch) -> None: # type: ign
def test_brand_manifest_well_known_path_no_warning(caplog, monkeypatch) -> None: # type: ignore[no-untyped-def]
"""Standard /.well-known/brand.json path does NOT warn — v3 derives the
same URL that the buyer originally pointed at."""
import adcp.compat.legacy.v2_5.get_products as _gp
import adcp.compat.legacy.v2_5._url as _url_mod

monkeypatch.setattr(_gp, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_GP_LOGGER):
monkeypatch.setattr(_url_mod, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_URL_LOGGER):
_adapt({"brand_manifest": "https://acme.com/.well-known/brand.json"})
assert not any(r.name == _GP_LOGGER for r in caplog.records)
assert not any(r.name == _URL_LOGGER for r in caplog.records)


def test_brand_manifest_bare_domain_no_warning(caplog, monkeypatch) -> None: # type: ignore[no-untyped-def]
"""Bare domain brand_manifest (no scheme) does not false-positive — urlparse
returns hostname=None for schemeless strings so we skip the path check."""
import adcp.compat.legacy.v2_5.get_products as _gp
import adcp.compat.legacy.v2_5._url as _url_mod

monkeypatch.setattr(_gp, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_GP_LOGGER):
monkeypatch.setattr(_url_mod, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_URL_LOGGER):
_adapt({"brand_manifest": "acme.com"})
assert not any(r.name == _GP_LOGGER for r in caplog.records)
assert not any(r.name == _URL_LOGGER for r in caplog.records)


def test_brand_manifest_cdn_url_warns_once_dedup(caplog, monkeypatch) -> None: # type: ignore[no-untyped-def]
"""Same CDN URL repeated across requests only logs one warning (dedup)."""
import adcp.compat.legacy.v2_5.get_products as _gp
import adcp.compat.legacy.v2_5._url as _url_mod

monkeypatch.setattr(_gp, "_brand_manifest_path_warned", set())
monkeypatch.setattr(_url_mod, "_brand_manifest_path_warned", set())
url = "https://cdn.acmecorp.com/brand-manifest.json"
with caplog.at_level(logging.WARNING, logger=_GP_LOGGER):
with caplog.at_level(logging.WARNING, logger=_URL_LOGGER):
_adapt({"brand_manifest": url})
_adapt({"brand_manifest": url})
records = [r for r in caplog.records if r.name == _GP_LOGGER]
records = [r for r in caplog.records if r.name == _URL_LOGGER]
assert len(records) == 1


def test_brand_manifest_inline_object_cdn_url_warns(caplog, monkeypatch) -> None: # type: ignore[no-untyped-def]
"""Inline BrandManifest object with a CDN url also warns — same
information loss as the URL-string branch (regression for #687)."""
import adcp.compat.legacy.v2_5._url as _url_mod

monkeypatch.setattr(_url_mod, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_URL_LOGGER):
out = _adapt(
{
"brand_manifest": {
"url": "https://cdn.acmecorp.com/brand-manifest.json",
"name": "ACME Corp",
}
}
)
assert out["brand"] == {"domain": "cdn.acmecorp.com"}
records = [r for r in caplog.records if r.name == _URL_LOGGER]
assert len(records) == 1
assert "cdn.acmecorp.com" in records[0].getMessage()


def test_brand_manifest_inline_object_well_known_no_warning(caplog, monkeypatch) -> None: # type: ignore[no-untyped-def]
"""Inline object with canonical /.well-known/brand.json url does NOT warn."""
import adcp.compat.legacy.v2_5._url as _url_mod

monkeypatch.setattr(_url_mod, "_brand_manifest_path_warned", set())
with caplog.at_level(logging.WARNING, logger=_URL_LOGGER):
_adapt(
{
"brand_manifest": {
"url": "https://acme.com/.well-known/brand.json",
"name": "ACME",
}
}
)
assert not any(r.name == _URL_LOGGER for r in caplog.records)


# ---------------------------------------------------------------------------
# Request: promoted_offerings → catalog
# ---------------------------------------------------------------------------
Expand Down
Loading
Loading