Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 29 additions & 2 deletions airbyte_cdk/test/standard_tests/docker_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,9 @@
from airbyte_cdk.models.connector_metadata import MetadataFile
from airbyte_cdk.test.entrypoint_wrapper import EntrypointOutput
from airbyte_cdk.test.models import ConnectorTestScenario
from airbyte_cdk.test.standard_tests.scenario_loader import (
load_metadata_smoke_test_scenarios,
)
from airbyte_cdk.utils.connector_paths import (
ACCEPTANCE_TEST_CONFIG,
find_connector_root,
Expand Down Expand Up @@ -122,9 +125,25 @@ def get_scenarios(
) -> list[ConnectorTestScenario]:
"""Get acceptance tests for a given category.

Scenarios are sourced from two files, in this order:

1. `acceptance-test-config.yml` (CAT-style scenarios), parsed as today.
2. `metadata.yaml` `connectorTestSuitesOptions[].suite == 'smokeTests'`,
with `${secret-ref:...}` and `${relative-date:...}` sigils resolved.

Smoke-test scenarios are additive; CAT-style scenarios are unchanged
for any connector that has not opted into the metadata.yaml dialect.

This has to be a separate function because pytest does not allow
parametrization of fixtures with arguments from the test class itself.
"""
cat_scenarios = cls._get_acceptance_test_scenarios()
smoke_scenarios = cls._get_smoke_test_scenarios()
return cat_scenarios + smoke_scenarios

@classmethod
def _get_acceptance_test_scenarios(cls) -> list[ConnectorTestScenario]:
"""Load CAT-style scenarios from `acceptance-test-config.yml`."""
try:
all_tests_config = cls.acceptance_test_config
except FileNotFoundError as e:
Expand Down Expand Up @@ -158,9 +177,17 @@ def get_scenarios(

test_scenarios.append(scenario)

deduped_test_scenarios = cls._dedup_scenarios(test_scenarios)
return cls._dedup_scenarios(test_scenarios)

return deduped_test_scenarios
@classmethod
def _get_smoke_test_scenarios(cls) -> list[ConnectorTestScenario]:
"""Load smoke-test scenarios declared in `metadata.yaml`, if any."""
connector_root = cls.get_connector_root_dir()
metadata_path = connector_root / "metadata.yaml"
return load_metadata_smoke_test_scenarios(
metadata_path=metadata_path,
connector_root=connector_root,
)

@pytest.mark.skipif(
shutil.which("docker") is None,
Expand Down
288 changes: 288 additions & 0 deletions airbyte_cdk/test/standard_tests/scenario_loader.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,288 @@
# Copyright (c) 2024 Airbyte, Inc., all rights reserved.
"""Load smoke-test scenarios from a connector's `metadata.yaml`.

Scenarios declared under
`data.connectorTestSuitesOptions[].suite == 'smokeTests'` are converted into
`ConnectorTestScenario` instances so they can be parametrized by the standard
test suite alongside scenarios discovered from `acceptance-test-config.yml`.

`configSettings` values support two interpolation sigils:

- `${secret-ref:<item>/<section>/<field>}` resolves a credential previously
written to `secrets/.env.<section>` by an out-of-band fetcher (today the
airbyte-ops-mcp `secrets fetch` command). All three segments are required.
- `${relative-date:-P<duration>}` resolves an ISO-8601 duration anchored at
today's UTC midnight and renders as an RFC 3339 timestamp
(e.g. `2026-03-22T00:00:00Z`).

`AIRBYTE_TEST_CREDS_PROVIDER=1pass` and `AIRBYTE_TEST_CREDS_1PASS_VAULT_NAME`
are required *only when* a `${secret-ref:...}` sigil is actually present in
the resolved scenario. Pure `${relative-date:...}` or literal scenarios resolve
without any provider env vars.
"""

from __future__ import annotations

import os
import re
from datetime import datetime, timezone
from pathlib import Path
from typing import Any

import isodate
import yaml

from airbyte_cdk.test.models import ConnectorTestScenario

SMOKE_TEST_SUITE_NAME = "smokeTests"
SECRETS_DIRNAME = "secrets"
ENV_FILE_PREFIX = ".env."

_SECRET_REF_PATTERN = re.compile(
r"^\$\{secret-ref:(?P<item>[^/}]+)/(?P<section>[^/}]+)/(?P<field>[^/}]+)\}$"
)
_RELATIVE_DATE_PATTERN = re.compile(r"^\$\{relative-date:-(?P<duration>P[^}]+)\}$")
Comment on lines +41 to +44
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Embedded sigils slip through silently — intentional?

Both _SECRET_REF_PATTERN and _RELATIVE_DATE_PATTERN are anchored with ^...$, so a value like "prefix-${secret-ref:a/b/c}" or "https://api.example.com/${relative-date:-P30D}/data" would not match either regex. It would also fall through the raw.startswith("${") and raw.endswith("}") guard on line 172, so it gets returned unchanged with the literal ${...} text intact — no error, no resolution.

Given that scenarios are config dicts which often embed timestamps or tokens into URLs, it might be worth either (a) supporting embedded sigils via re.sub, or (b) tightening the unrecognized-sigil check to fail on any string containing ${...} regardless of position. wdyt?

🛡️ Sketch of a stricter "any-${...}" guard
-    if raw.startswith("${") and raw.endswith("}"):
+    # Catch embedded or malformed sigils too — they would otherwise pass through
+    # unresolved and silently corrupt scenario configs.
+    if "${" in raw:
         raise SigilResolutionError(
             f"Unrecognized smoke-test sigil: `{raw}`. Supported forms are "
             "`${secret-ref:<item>/<section>/<field>}` and "
             "`${relative-date:-P<duration>}`."
         )

Also applies to: 158-179

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@airbyte_cdk/test/standard_tests/scenario_loader.py` around lines 41 - 44, The
current anchored regexes _SECRET_REF_PATTERN and _RELATIVE_DATE_PATTERN only
match whole-string sigils and the existing guard using raw.startswith("${") and
raw.endswith("}") lets embedded sigils (e.g. "prefix-${secret-ref:...}" or URLs
with ${...}) pass through unresolved; update scenario_loader.py to either
(preferred) support embedded sigils by applying re.sub with _SECRET_REF_PATTERN
and _RELATIVE_DATE_PATTERN so embedded occurrences are replaced, or
(alternative) tighten the unrecognized-sigil guard (replace the
raw.startswith/raw.endswith check) to search for any r"\$\{[^}]+\}" and raise an
error when any such pattern is found but not matched by the known regexes;
reference _SECRET_REF_PATTERN, _RELATIVE_DATE_PATTERN, and the current
raw.startswith("${") and raw.endswith("}") guard when making the change.



class SigilResolutionError(ValueError):
"""Raised when a sigil cannot be resolved into a concrete value."""


def load_metadata_smoke_test_scenarios(
metadata_path: Path,
connector_root: Path,
) -> list[ConnectorTestScenario]:
"""Return smoke-test scenarios declared in a connector's `metadata.yaml`.

Returns an empty list when the file does not exist or contains no
`smokeTests` suite. Sigil resolution is eager: each scenario's
`configSettings` is fully materialized before the `ConnectorTestScenario`
is constructed, so downstream test code never sees an unresolved sigil.
"""
if not metadata_path.exists():
return []

metadata = yaml.safe_load(metadata_path.read_text()) or {}
suites = metadata.get("data", {}).get("connectorTestSuitesOptions", [])
Comment on lines +65 to +66
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

Minor null-safety on connectorTestSuitesOptions.

If a metadata.yaml author writes connectorTestSuitesOptions: with nothing under it, yaml.safe_load returns None for that key, and for suite in suites: will raise TypeError: 'NoneType' object is not iterable. Cheap fix — same pattern you already use on line 73. wdyt?

🛡️ Defensive `or []`
-    suites = metadata.get("data", {}).get("connectorTestSuitesOptions", [])
+    suites = metadata.get("data", {}).get("connectorTestSuitesOptions") or []
🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@airbyte_cdk/test/standard_tests/scenario_loader.py` around lines 65 - 66, The
variable `suites` can be None if `connectorTestSuitesOptions:` is present but
empty in the YAML; change the assignment of `suites` so it defensively coalesces
None to an empty list (e.g., use the existing pattern of
`...get("connectorTestSuitesOptions") or []`) so `for suite in suites:` never
fails; update the line that defines `suites` in scenario_loader.py accordingly.


scenarios: list[ConnectorTestScenario] = []
for suite in suites:
if not isinstance(suite, dict) or suite.get("suite") != SMOKE_TEST_SUITE_NAME:
continue

for raw_scenario in suite.get("scenarios", []) or []:
scenarios.append(
_build_scenario(
raw_scenario=raw_scenario,
connector_root=connector_root,
)
)
Comment on lines +69 to +79
Copy link

Copilot AI Apr 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suite.get("scenarios") items are passed directly into _build_scenario, but if a connector's metadata.yaml contains a non-mapping entry (e.g. a string/number), _build_scenario will crash with AttributeError on raw_scenario.get(...). Consider validating raw_scenario is a dict in the loop (or inside _build_scenario) and raising SigilResolutionError with a clear message instead of an unhandled exception.

Copilot uses AI. Check for mistakes.

return scenarios


def _build_scenario(
*,
raw_scenario: dict[str, Any],
connector_root: Path,
) -> ConnectorTestScenario:
name = raw_scenario.get("name")
if not name or not isinstance(name, str):
raise SigilResolutionError("Smoke-test scenario is missing a string `name` field.")

raw_settings = raw_scenario.get("configSettings", {}) or {}
if not isinstance(raw_settings, dict):
raise SigilResolutionError(
f"Smoke-test scenario `{name}` has non-mapping `configSettings`."
)

env_cache: dict[str, dict[str, str]] = {}
has_secret_ref = _contains_secret_ref(raw_settings)
if has_secret_ref:
_validate_provider_env()
Comment on lines +99 to +102
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

🔴 Smoke-test scenarios with ${secret-ref:...} crash test collection, breaking the --no-creds workflow

When a connector's metadata.yaml declares smoke-test scenarios containing ${secret-ref:...} sigils, get_scenarios() eagerly resolves them via _get_smoke_test_scenarios()load_metadata_smoke_test_scenarios()_build_scenario()_validate_provider_env(). If the credentials environment is not configured, this raises SigilResolutionError during pytest_generate_tests (pytest_hooks.py:173), crashing all test collection — not just the tests that need credentials.

The existing --no-creds workflow (-m "not requires_creds") filters tests after collection, but collection itself now fails before filtering can run. Additionally, requires_creds (scenario.py:192-194) checks only self.config_path, so smoke-test scenarios (which use config_dict, not config_path) always return False — meaning even if collection succeeded, these scenarios would not be filtered by --no-creds.

The net effect: any connector that adds a ${secret-ref:...} smoke-test scenario to metadata.yaml will break all test runs for developers/CI without credentials, even for tests that don't require credentials (e.g., test_docker_image_build_and_spec).

Prompt for agents
The problem: _build_scenario eagerly validates provider env and resolves secret-ref sigils. This happens during get_scenarios() which is called during pytest_generate_tests (test collection). If credentials are unavailable, a SigilResolutionError crashes ALL test collection, preventing even non-credentials tests from running. The --no-creds CLI flag cannot help because it applies mark-based filtering after collection.

Additionally, ConnectorTestScenario.requires_creds (in airbyte_cdk/test/models/scenario.py:192-194) only checks config_path for 'secrets' in its path parts. Smoke-test scenarios use config_dict (not config_path), so requires_creds always returns False for them.

Two things need to happen:
1. In load_metadata_smoke_test_scenarios (or _get_smoke_test_scenarios in docker_base.py), catch SigilResolutionError for individual scenarios and either skip them with a warning or defer resolution. This prevents one unresolvable scenario from blocking all test collection.
2. Smoke-test scenarios that were resolved from secret-ref sigils should be marked as requiring credentials, either by extending the requires_creds property in ConnectorTestScenario to also check for a flag set during resolution, or by adding a marker on the scenario during _build_scenario.
Open in Devin Review

Was this helpful? React with 👍 or 👎 to provide feedback.


resolved_settings = _resolve_sigils(
value=raw_settings,
connector_root=connector_root,
env_cache=env_cache,
)
Comment on lines +99 to +108
Copy link

Copilot AI Apr 25, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Right now secret-ref scenarios validate provider env vars and resolve secrets during scenario discovery/collection. This will make airbyte-cdk ... --no-creds (which relies on the requires_creds marker) fail during pytest collection for connectors that opt into smoke tests, because the exception is raised before pytest can filter out requires_creds tests. Consider marking these scenarios as requires_creds (e.g., by setting a config_path under secrets/ or adding an explicit flag on ConnectorTestScenario) and deferring ${secret-ref:...} resolution/validation until the scenario is actually executed, so no-creds runs can still collect and skip cleanly.

Copilot uses AI. Check for mistakes.

scenario = ConnectorTestScenario.model_validate({"config_dict": resolved_settings})
# `_id` is a Pydantic v2 private attribute, so it is not populated by
# `model_validate` and the model is frozen. Bypass `__setattr__` once at
# construction time so the scenario reports a stable, human-readable id.
object.__setattr__(scenario, "_id", f"smoke-{name}")
return scenario


def _resolve_sigils(
*,
value: Any,
connector_root: Path,
env_cache: dict[str, dict[str, str]],
) -> Any:
"""Recursively resolve `${...}` sigils inside `value`."""
if isinstance(value, dict):
return {
k: _resolve_sigils(
value=v,
connector_root=connector_root,
env_cache=env_cache,
)
for k, v in value.items()
}
if isinstance(value, list):
return [
_resolve_sigils(
value=item,
connector_root=connector_root,
env_cache=env_cache,
)
for item in value
]
if isinstance(value, str):
return _resolve_string(
raw=value,
connector_root=connector_root,
env_cache=env_cache,
)
return value


def _resolve_string(
*,
raw: str,
connector_root: Path,
env_cache: dict[str, dict[str, str]],
) -> str:
secret_match = _SECRET_REF_PATTERN.match(raw)
if secret_match is not None:
return _resolve_secret_ref(
item=secret_match.group("item"),
section=secret_match.group("section"),
field=secret_match.group("field"),
connector_root=connector_root,
env_cache=env_cache,
)

date_match = _RELATIVE_DATE_PATTERN.match(raw)
if date_match is not None:
return _resolve_relative_date(duration_str=date_match.group("duration"))

if raw.startswith("${") and raw.endswith("}"):
raise SigilResolutionError(
f"Unrecognized smoke-test sigil: `{raw}`. Supported forms are "
"`${secret-ref:<item>/<section>/<field>}` and "
"`${relative-date:-P<duration>}`."
)

return raw


def _resolve_secret_ref(
*,
item: str,
section: str,
field: str,
connector_root: Path,
env_cache: dict[str, dict[str, str]],
) -> str:
"""Resolve `${secret-ref:item/section/field}` against `secrets/.env.<section>`.

The section segment is the literal 1Password section name (no prefix
stripping). Env var names inside each `.env.<section>` file are flat:
`<ITEM_UPPER>_<FIELD_UPPER>`.
"""
env_path = connector_root / SECRETS_DIRNAME / f"{ENV_FILE_PREFIX}{section}"
env_vars = _load_env_file(path=env_path, cache=env_cache)

var_name = f"{item.upper()}_{field.upper()}"
if var_name not in env_vars:
raise SigilResolutionError(
f"Env var `{var_name}` not found in `{env_path}` for sigil "
f"`${{secret-ref:{item}/{section}/{field}}}`."
)
return env_vars[var_name]
Comment on lines +196 to +205
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🌐 Web query:

What naming convention does the airbyte-ops-mcp secrets fetchcommand use for env var names written to.env.

files (dashes vs underscores)?

💡 Result:

The airbyte-ops-mcp secrets fetch command writes environment variable names to .env.

files using UPPERCASE_WITH_UNDERSCORES convention (e.g., AIRBYTE_CLOUD_CLIENT_ID, POSTGRES_PASSWORD). No sources explicitly document dashes in env var names for this command; all examples use underscores, consistent with standard dotenv and Unix environment variable practices. The repo appears internal (airbytehq/airbyte-ops-mcp), with PyPI pages showing .env examples using underscores.

Citations:


🏁 Script executed:

# First, let's look at the code context around lines 196-205
cd airbyte_cdk && head -220 test/standard_tests/scenario_loader.py | tail -40

Repository: airbytehq/airbyte-python-cdk

Length of output: 1449


🏁 Script executed:

# Search for sigil parsing logic to understand how item/field are extracted
rg "secret-ref" -A 3 -B 3 --type py

Repository: airbytehq/airbyte-python-cdk

Length of output: 11451


🏁 Script executed:

# Look for any validation of item/field values
rg "item|field" test/standard_tests/scenario_loader.py -B 2 -A 2 -n

Repository: airbytehq/airbyte-python-cdk

Length of output: 151


🏁 Script executed:

# Check if there are any tests that use dashed segments
rg "secret-ref.*-" test/ --type py

Repository: airbytehq/airbyte-python-cdk

Length of output: 118


Normalize dashes in env var name construction or validate at parse time.

The regex pattern (?P<item>[^/}]+) allows dashes in item and field segments, but the env var construction var_name = f"{item.upper()}_{field.upper()}" doesn't normalize them. If a field like api-key is used, it would create API-KEY (invalid for POSIX env var names). While airbyte-ops-mcp itself follows UPPERCASE_WITH_UNDERSCORES convention with no documented dash usage, the code would silently break if someone manually creates .env files with dashed field names or if upstream changes.

Would you prefer to normalize dashes to underscores during construction, or reject dashed segments at parse time with a clear error message? The former is more forgiving; the latter explicitly guides users to the expected format.

🤖 Prompt for AI Agents
Verify each finding against the current code and only fix it if needed.

In `@airbyte_cdk/test/standard_tests/scenario_loader.py` around lines 196 - 205,
The env var name construction can produce invalid POSIX names when segments
contain dashes; update the code around var_name in scenario_loader.py (the block
using ENV_FILE_PREFIX, SECRETS_DIRNAME, _load_env_file and raising
SigilResolutionError) to normalize dashes to underscores before uppercasing and
joining: e.g., replace "-" with "_" on both item and field prior to building
var_name, then use that normalized var_name to look up env_vars; also add a
brief comment and (optional) a clearer SigilResolutionError message that shows
the normalized var name when not found so debugging is easier.



def _resolve_relative_date(*, duration_str: str) -> str:
"""Render `${relative-date:-P<duration>}` as RFC 3339 at today midnight UTC."""
duration = isodate.parse_duration(duration_str)
now = datetime.now(tz=timezone.utc)
today_midnight = datetime(
year=now.year,
month=now.month,
day=now.day,
tzinfo=timezone.utc,
)
rendered: datetime = today_midnight - duration
if rendered.tzinfo is None:
rendered = rendered.replace(tzinfo=timezone.utc)
return rendered.strftime("%Y-%m-%dT%H:%M:%SZ")


def _load_env_file(
Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Shouldn't we use the python dotenv library rather than rebuild this from scratch??

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fair — you're right. Originally deferred for "fewer escape-quirk surprises" but airbyte-ops-mcp already uses python-dotenv to write these files, so the CDK reading them with anything else is asymmetric and a future bug factory. Switching _load_env_file to dotenv.dotenv_values() in the next push.

*,
path: Path,
cache: dict[str, dict[str, str]],
) -> dict[str, str]:
"""Parse a `.env.<section>` file into a name->value mapping.

Supports `KEY=value` and `KEY="value"` lines. Comments (`#`) and blank
lines are ignored. The result is cached per `path` so that scenarios with
multiple sigils against the same section pay the parse cost once.
"""
cache_key = str(path)
if cache_key in cache:
return cache[cache_key]

if not path.exists():
raise SigilResolutionError(
f"Env file `{path}` not found. Run `airbyte-ops secrets fetch "
"<connector>` to populate it before invoking the test suite."
)

parsed: dict[str, str] = {}
for raw_line in path.read_text().splitlines():
line = raw_line.strip()
if not line or line.startswith("#"):
continue
if "=" not in line:
continue
key, _, value = line.partition("=")
key = key.strip()
value = value.strip()
if len(value) >= 2 and value[0] == value[-1] and value[0] in ('"', "'"):
value = value[1:-1]
parsed[key] = value

cache[cache_key] = parsed
return parsed


def _contains_secret_ref(value: Any) -> bool:
"""Walk `value` and return True if any string contains a `secret-ref` sigil."""
if isinstance(value, dict):
return any(_contains_secret_ref(v) for v in value.values())
if isinstance(value, list):
return any(_contains_secret_ref(item) for item in value)
if isinstance(value, str):
return _SECRET_REF_PATTERN.match(value) is not None
return False


def _validate_provider_env() -> None:
"""Fail fast if the secret-provider env var contract is unmet."""
provider = os.environ.get("AIRBYTE_TEST_CREDS_PROVIDER", "")
vault = os.environ.get("AIRBYTE_TEST_CREDS_1PASS_VAULT_NAME", "")
if provider != "1pass":
raise SigilResolutionError(
"Smoke-test scenario uses `${secret-ref:...}` sigils but "
"`AIRBYTE_TEST_CREDS_PROVIDER` is not set to `1pass` "
"(only supported provider today)."
)
if not vault:
raise SigilResolutionError(
"Smoke-test scenario uses `${secret-ref:...}` sigils but "
"`AIRBYTE_TEST_CREDS_1PASS_VAULT_NAME` is not set."
)
Loading
Loading