|
| 1 | +"""Ensure env-var names in public docstrings are backed by actual |
| 2 | +``os.environ`` reads in the source tree. |
| 3 | +
|
| 4 | +Motivated by the drift in ``client.py:381-388``: the docstring claimed |
| 5 | +``PYTHON_ENV`` / ``ENV`` / ``ENVIRONMENT`` were honored; only ``ADCP_ENV`` |
| 6 | +was. This test prevents that class of documentation drift from re-entering |
| 7 | +silently. |
| 8 | +
|
| 9 | +Detection rules (applied per docstring line, with a ±1-line window for |
| 10 | +Pattern B context keywords): |
| 11 | +
|
| 12 | +- **Pattern A** ``VARNAME=value`` — RST double-backtick env-var assignment |
| 13 | + syntax, e.g. `` ``ADCP_VALIDATION_MODE=strict|warn|off`` ``. |
| 14 | +- **Pattern B** SCREAMING_CASE_WITH_UNDERSCORE inside a double-backtick span |
| 15 | + on a line whose ±1 neighborhood contains an env-var context keyword |
| 16 | + (``environ``, ``env var``, ``environment variable``, ``flips``). Restricted |
| 17 | + to backtick-wrapped tokens to prevent false positives from protocol |
| 18 | + constants or OOP-override prose (e.g. "this method overrides HTTP_TIMEOUT"). |
| 19 | + "overrides" is intentionally excluded as a standalone keyword for the same |
| 20 | + reason; legitimate override docs use ``VARNAME=value`` or "VARNAME env var". |
| 21 | +
|
| 22 | +Exclusion: lines containing ``deliberately`` or ``not honored`` are skipped — |
| 23 | +the var is explicitly documented as inactive on that line. Bare "ignored" is |
| 24 | +not an exclusion keyword to avoid suppressing real checks when a docstring |
| 25 | +says "X is ignored when Y is set explicitly." |
| 26 | +
|
| 27 | +Known limitations: |
| 28 | +- Single-token env vars without underscores (e.g. ``PORT``) are invisible to |
| 29 | + both patterns; the regex requires at least one underscore to distinguish env |
| 30 | + vars from acronyms and other short tokens. |
| 31 | +- ``_env_vars_in_use()`` scans raw source text, so ``os.environ`` calls that |
| 32 | + appear inside docstring examples are included in the "in use" set. A future |
| 33 | + docstring embedding a fictional ``os.environ["FAKE_VAR"]`` call would |
| 34 | + suppress detection of that name. This is an accepted trade-off for |
| 35 | + simplicity. |
| 36 | +- ``ast.walk`` collects public methods defined inside private classes. This is |
| 37 | + a known over-approximation; fixing it requires tracking parent scope. |
| 38 | +""" |
| 39 | + |
| 40 | +from __future__ import annotations |
| 41 | + |
| 42 | +import ast |
| 43 | +import re |
| 44 | +from pathlib import Path |
| 45 | + |
| 46 | +SRC_ROOT = Path(__file__).parent.parent / "src" / "adcp" |
| 47 | + |
| 48 | +# Pattern A: ``VARNAME=anything`` inside RST double-backtick literals. |
| 49 | +# Captures the var name without the trailing ``=``. |
| 50 | +_ASSIGNMENT_PATTERN = re.compile(r"``([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)=") |
| 51 | + |
| 52 | +# Pattern B — step 1: extract double-backtick span contents. |
| 53 | +_BACKTICK_SPAN = re.compile(r"``([^`]+)``") |
| 54 | + |
| 55 | +# Pattern B — step 2: find SCREAMING_CASE_WITH_UNDERSCORE inside a span. |
| 56 | +# Two-step avoids greedy-match bugs where a single regex would capture a |
| 57 | +# suffix like ``P_ENV`` from ``ADCP_ENV``. |
| 58 | +_SCREAMING_WITH_UNDERSCORE = re.compile(r"\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b") |
| 59 | + |
| 60 | +# Context keywords for Pattern B. Present in the line's ±1 neighborhood, they |
| 61 | +# indicate the backtick-wrapped token is an env var. "overrides" is excluded |
| 62 | +# (see module docstring) to prevent OOP-override false positives. |
| 63 | +_ENV_CONTEXT_RE = re.compile( |
| 64 | + r"\benviron\b|\benv var\b|\benvironment variable\b|\bflips?\b", |
| 65 | + re.IGNORECASE, |
| 66 | +) |
| 67 | + |
| 68 | +# Words that, on the SAME line as a token, explicitly mark it as inactive. |
| 69 | +# Bare "ignored" is excluded: "X is ignored when Y is set" would suppress |
| 70 | +# legitimate checks if we matched it. |
| 71 | +_IGNORE_CONTEXT_RE = re.compile( |
| 72 | + r"\bdeliberately\b|\bnot honored\b", |
| 73 | + re.IGNORECASE, |
| 74 | +) |
| 75 | + |
| 76 | +# All the ways the source code reads env vars: |
| 77 | +# os.environ.get("VAR"), os.getenv("VAR"), os.environ["VAR"] |
| 78 | +_ENVIRON_READ_RE = re.compile( |
| 79 | + r"""(?:os\.environ\.get|os\.getenv)\(\s*['"]([A-Z][A-Z0-9_]+)['"]\s*""" |
| 80 | + r"""|os\.environ\[\s*['"]([A-Z][A-Z0-9_]+)['"]\s*\]""" |
| 81 | +) |
| 82 | + |
| 83 | + |
| 84 | +def _env_vars_in_use() -> frozenset[str]: |
| 85 | + """Return all env var names read by any file under src/adcp/. |
| 86 | +
|
| 87 | + Scans raw source text; see module docstring for the known limitation about |
| 88 | + os.environ calls embedded in docstring examples. |
| 89 | + """ |
| 90 | + found: set[str] = set() |
| 91 | + for path in SRC_ROOT.rglob("*.py"): |
| 92 | + src = path.read_text(encoding="utf-8") |
| 93 | + for m in _ENVIRON_READ_RE.finditer(src): |
| 94 | + var = m.group(1) or m.group(2) |
| 95 | + if var: |
| 96 | + found.add(var) |
| 97 | + return frozenset(found) |
| 98 | + |
| 99 | + |
| 100 | +def _collect_docstrings(tree: ast.Module) -> list[tuple[str, int]]: |
| 101 | + """Return (docstring, lineno) for all public nodes, including dunders. |
| 102 | +
|
| 103 | + See module docstring for the known limitation about public methods inside |
| 104 | + private classes. |
| 105 | + """ |
| 106 | + results: list[tuple[str, int]] = [] |
| 107 | + for node in ast.walk(tree): |
| 108 | + name: str = getattr(node, "name", "") or "" |
| 109 | + # Include dunder methods (__init__, __str__, …) — their docstrings are |
| 110 | + # user-facing. Skip single-leading-underscore private names only. |
| 111 | + if name.startswith("_") and not (name.startswith("__") and name.endswith("__")): |
| 112 | + continue |
| 113 | + if not isinstance( |
| 114 | + node, (ast.Module, ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef) |
| 115 | + ): |
| 116 | + continue |
| 117 | + doc = ast.get_docstring(node) |
| 118 | + if doc: |
| 119 | + # ast.Module has no lineno; use 1 since module docstrings are at |
| 120 | + # the top of the file. |
| 121 | + lineno = getattr(node, "lineno", 1) |
| 122 | + results.append((doc, lineno)) |
| 123 | + return results |
| 124 | + |
| 125 | + |
| 126 | +def _env_vars_from_docstring(doc: str) -> set[str]: |
| 127 | + """Extract env var names documented as active in this docstring. |
| 128 | +
|
| 129 | + Uses a ±1-line window for Pattern B so that a context keyword on the line |
| 130 | + immediately after the env-var reference is still detected (e.g. |
| 131 | + ``ADCP_ENV`` on one line, ``flips …`` on the next). |
| 132 | + """ |
| 133 | + lines = doc.splitlines() |
| 134 | + found: set[str] = set() |
| 135 | + |
| 136 | + for i, line in enumerate(lines): |
| 137 | + # Pattern A: ``VARNAME=...`` assignment syntax — line-local check. |
| 138 | + if not _IGNORE_CONTEXT_RE.search(line): |
| 139 | + for m in _ASSIGNMENT_PATTERN.finditer(line): |
| 140 | + found.add(m.group(1)) |
| 141 | + |
| 142 | + # Pattern B: backtick-wrapped SCREAMING_CASE in env-var context. |
| 143 | + if _IGNORE_CONTEXT_RE.search(line): |
| 144 | + continue # this line explicitly marks var(s) as inactive |
| 145 | + |
| 146 | + # Collect SCREAMING_CASE tokens from inside each ``...`` span. |
| 147 | + screaming_vars = [ |
| 148 | + vm.group(1) |
| 149 | + for sm in _BACKTICK_SPAN.finditer(line) |
| 150 | + for vm in _SCREAMING_WITH_UNDERSCORE.finditer(sm.group(1)) |
| 151 | + ] |
| 152 | + if not screaming_vars: |
| 153 | + continue |
| 154 | + |
| 155 | + # Context keyword may be on this line or an immediate neighbor (±1). |
| 156 | + window = [line] |
| 157 | + if i > 0: |
| 158 | + window.append(lines[i - 1]) |
| 159 | + if i < len(lines) - 1: |
| 160 | + window.append(lines[i + 1]) |
| 161 | + if any(_ENV_CONTEXT_RE.search(w) for w in window): |
| 162 | + found.update(screaming_vars) |
| 163 | + |
| 164 | + return found |
| 165 | + |
| 166 | + |
| 167 | +def test_docstring_env_var_consistency() -> None: |
| 168 | + """Every env-var name documented as active in a public docstring must |
| 169 | + have a matching ``os.environ`` read somewhere in ``src/adcp/``. |
| 170 | +
|
| 171 | + A violation means either: |
| 172 | + - the implementation was removed but the docs were not updated, or |
| 173 | + - the docs claim an override that was never implemented. |
| 174 | +
|
| 175 | + Fix: add the missing ``os.environ.get(...)`` call, or add the phrase |
| 176 | + 'deliberately ignored' to the docstring line. |
| 177 | + """ |
| 178 | + in_use = _env_vars_in_use() |
| 179 | + violations: list[str] = [] |
| 180 | + |
| 181 | + for filepath in sorted(SRC_ROOT.rglob("*.py")): |
| 182 | + try: |
| 183 | + source = filepath.read_text(encoding="utf-8") |
| 184 | + tree = ast.parse(source, filename=str(filepath)) |
| 185 | + except SyntaxError: |
| 186 | + # Syntax errors are caught by dedicated CI checks; skip here. |
| 187 | + continue # pragma: no cover |
| 188 | + |
| 189 | + rel = str(filepath.relative_to(SRC_ROOT.parent.parent)) |
| 190 | + for doc, lineno in _collect_docstrings(tree): |
| 191 | + for var in _env_vars_from_docstring(doc): |
| 192 | + if var not in in_use: |
| 193 | + violations.append( |
| 194 | + f"{rel}:{lineno}: docstring references {var!r} " |
| 195 | + f"but no os.environ read of {var!r} found in src/adcp/" |
| 196 | + ) |
| 197 | + |
| 198 | + assert not violations, ( |
| 199 | + "Docstring/code env-var drift detected.\n" |
| 200 | + "Either add the missing os.environ read, or mark the var as " |
| 201 | + "'deliberately ignored' in the docstring.\n\n" + "\n".join(violations) |
| 202 | + ) |
0 commit comments