Skip to content

Commit d897b62

Browse files
bokelleyclaude
andauthored
test(adcp): docstring/env-var consistency test (#440)
* test(adcp): add docstring/env-var consistency test Adds tests/test_docstring_consistency.py, which AST-parses public method docstrings across src/adcp/, extracts env-var names documented as active (via ``VARNAME=value`` assignment syntax or env-var context keywords), and asserts each one is backed by a real os.environ read in the source tree. Closes #424 https://claude.ai/code/${CLAUDE_CODE_REMOTE_SESSION_ID} * test(adcp): fix Pattern B regex; drop overrides keyword; sliding window Address pre-PR review blockers: - Replace single greedy regex with two-step approach (backtick span extraction + word-boundary SCREAMING_CASE search) to avoid matching suffixes like P_ENV from ADCP_ENV. - Add ±1 line sliding window for Pattern B context keywords so ADCP_ENV on one line / "flips" on the next is correctly detected. - Remove standalone "overrides" from context keywords to prevent false positives from OOP-override prose. - Drop bare `ignored?` from exclusion pattern; keep only "deliberately" and "not honored" to avoid suppressing legitimate active-var checks. - Fix module-level lineno fallback to 1 (not 0). - Add known-limitations section to module docstring. https://claude.ai/code/${CLAUDE_CODE_REMOTE_SESSION_ID} --------- Co-authored-by: Claude <noreply@anthropic.com>
1 parent 117d4f2 commit d897b62

1 file changed

Lines changed: 202 additions & 0 deletions

File tree

Lines changed: 202 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,202 @@
1+
"""Ensure env-var names in public docstrings are backed by actual
2+
``os.environ`` reads in the source tree.
3+
4+
Motivated by the drift in ``client.py:381-388``: the docstring claimed
5+
``PYTHON_ENV`` / ``ENV`` / ``ENVIRONMENT`` were honored; only ``ADCP_ENV``
6+
was. This test prevents that class of documentation drift from re-entering
7+
silently.
8+
9+
Detection rules (applied per docstring line, with a ±1-line window for
10+
Pattern B context keywords):
11+
12+
- **Pattern A** ``VARNAME=value`` — RST double-backtick env-var assignment
13+
syntax, e.g. `` ``ADCP_VALIDATION_MODE=strict|warn|off`` ``.
14+
- **Pattern B** SCREAMING_CASE_WITH_UNDERSCORE inside a double-backtick span
15+
on a line whose ±1 neighborhood contains an env-var context keyword
16+
(``environ``, ``env var``, ``environment variable``, ``flips``). Restricted
17+
to backtick-wrapped tokens to prevent false positives from protocol
18+
constants or OOP-override prose (e.g. "this method overrides HTTP_TIMEOUT").
19+
"overrides" is intentionally excluded as a standalone keyword for the same
20+
reason; legitimate override docs use ``VARNAME=value`` or "VARNAME env var".
21+
22+
Exclusion: lines containing ``deliberately`` or ``not honored`` are skipped —
23+
the var is explicitly documented as inactive on that line. Bare "ignored" is
24+
not an exclusion keyword to avoid suppressing real checks when a docstring
25+
says "X is ignored when Y is set explicitly."
26+
27+
Known limitations:
28+
- Single-token env vars without underscores (e.g. ``PORT``) are invisible to
29+
both patterns; the regex requires at least one underscore to distinguish env
30+
vars from acronyms and other short tokens.
31+
- ``_env_vars_in_use()`` scans raw source text, so ``os.environ`` calls that
32+
appear inside docstring examples are included in the "in use" set. A future
33+
docstring embedding a fictional ``os.environ["FAKE_VAR"]`` call would
34+
suppress detection of that name. This is an accepted trade-off for
35+
simplicity.
36+
- ``ast.walk`` collects public methods defined inside private classes. This is
37+
a known over-approximation; fixing it requires tracking parent scope.
38+
"""
39+
40+
from __future__ import annotations
41+
42+
import ast
43+
import re
44+
from pathlib import Path
45+
46+
SRC_ROOT = Path(__file__).parent.parent / "src" / "adcp"
47+
48+
# Pattern A: ``VARNAME=anything`` inside RST double-backtick literals.
49+
# Captures the var name without the trailing ``=``.
50+
_ASSIGNMENT_PATTERN = re.compile(r"``([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)=")
51+
52+
# Pattern B — step 1: extract double-backtick span contents.
53+
_BACKTICK_SPAN = re.compile(r"``([^`]+)``")
54+
55+
# Pattern B — step 2: find SCREAMING_CASE_WITH_UNDERSCORE inside a span.
56+
# Two-step avoids greedy-match bugs where a single regex would capture a
57+
# suffix like ``P_ENV`` from ``ADCP_ENV``.
58+
_SCREAMING_WITH_UNDERSCORE = re.compile(r"\b([A-Z][A-Z0-9]*(?:_[A-Z0-9]+)+)\b")
59+
60+
# Context keywords for Pattern B. Present in the line's ±1 neighborhood, they
61+
# indicate the backtick-wrapped token is an env var. "overrides" is excluded
62+
# (see module docstring) to prevent OOP-override false positives.
63+
_ENV_CONTEXT_RE = re.compile(
64+
r"\benviron\b|\benv var\b|\benvironment variable\b|\bflips?\b",
65+
re.IGNORECASE,
66+
)
67+
68+
# Words that, on the SAME line as a token, explicitly mark it as inactive.
69+
# Bare "ignored" is excluded: "X is ignored when Y is set" would suppress
70+
# legitimate checks if we matched it.
71+
_IGNORE_CONTEXT_RE = re.compile(
72+
r"\bdeliberately\b|\bnot honored\b",
73+
re.IGNORECASE,
74+
)
75+
76+
# All the ways the source code reads env vars:
77+
# os.environ.get("VAR"), os.getenv("VAR"), os.environ["VAR"]
78+
_ENVIRON_READ_RE = re.compile(
79+
r"""(?:os\.environ\.get|os\.getenv)\(\s*['"]([A-Z][A-Z0-9_]+)['"]\s*"""
80+
r"""|os\.environ\[\s*['"]([A-Z][A-Z0-9_]+)['"]\s*\]"""
81+
)
82+
83+
84+
def _env_vars_in_use() -> frozenset[str]:
85+
"""Return all env var names read by any file under src/adcp/.
86+
87+
Scans raw source text; see module docstring for the known limitation about
88+
os.environ calls embedded in docstring examples.
89+
"""
90+
found: set[str] = set()
91+
for path in SRC_ROOT.rglob("*.py"):
92+
src = path.read_text(encoding="utf-8")
93+
for m in _ENVIRON_READ_RE.finditer(src):
94+
var = m.group(1) or m.group(2)
95+
if var:
96+
found.add(var)
97+
return frozenset(found)
98+
99+
100+
def _collect_docstrings(tree: ast.Module) -> list[tuple[str, int]]:
101+
"""Return (docstring, lineno) for all public nodes, including dunders.
102+
103+
See module docstring for the known limitation about public methods inside
104+
private classes.
105+
"""
106+
results: list[tuple[str, int]] = []
107+
for node in ast.walk(tree):
108+
name: str = getattr(node, "name", "") or ""
109+
# Include dunder methods (__init__, __str__, …) — their docstrings are
110+
# user-facing. Skip single-leading-underscore private names only.
111+
if name.startswith("_") and not (name.startswith("__") and name.endswith("__")):
112+
continue
113+
if not isinstance(
114+
node, (ast.Module, ast.ClassDef, ast.FunctionDef, ast.AsyncFunctionDef)
115+
):
116+
continue
117+
doc = ast.get_docstring(node)
118+
if doc:
119+
# ast.Module has no lineno; use 1 since module docstrings are at
120+
# the top of the file.
121+
lineno = getattr(node, "lineno", 1)
122+
results.append((doc, lineno))
123+
return results
124+
125+
126+
def _env_vars_from_docstring(doc: str) -> set[str]:
127+
"""Extract env var names documented as active in this docstring.
128+
129+
Uses a ±1-line window for Pattern B so that a context keyword on the line
130+
immediately after the env-var reference is still detected (e.g.
131+
``ADCP_ENV`` on one line, ``flips …`` on the next).
132+
"""
133+
lines = doc.splitlines()
134+
found: set[str] = set()
135+
136+
for i, line in enumerate(lines):
137+
# Pattern A: ``VARNAME=...`` assignment syntax — line-local check.
138+
if not _IGNORE_CONTEXT_RE.search(line):
139+
for m in _ASSIGNMENT_PATTERN.finditer(line):
140+
found.add(m.group(1))
141+
142+
# Pattern B: backtick-wrapped SCREAMING_CASE in env-var context.
143+
if _IGNORE_CONTEXT_RE.search(line):
144+
continue # this line explicitly marks var(s) as inactive
145+
146+
# Collect SCREAMING_CASE tokens from inside each ``...`` span.
147+
screaming_vars = [
148+
vm.group(1)
149+
for sm in _BACKTICK_SPAN.finditer(line)
150+
for vm in _SCREAMING_WITH_UNDERSCORE.finditer(sm.group(1))
151+
]
152+
if not screaming_vars:
153+
continue
154+
155+
# Context keyword may be on this line or an immediate neighbor (±1).
156+
window = [line]
157+
if i > 0:
158+
window.append(lines[i - 1])
159+
if i < len(lines) - 1:
160+
window.append(lines[i + 1])
161+
if any(_ENV_CONTEXT_RE.search(w) for w in window):
162+
found.update(screaming_vars)
163+
164+
return found
165+
166+
167+
def test_docstring_env_var_consistency() -> None:
168+
"""Every env-var name documented as active in a public docstring must
169+
have a matching ``os.environ`` read somewhere in ``src/adcp/``.
170+
171+
A violation means either:
172+
- the implementation was removed but the docs were not updated, or
173+
- the docs claim an override that was never implemented.
174+
175+
Fix: add the missing ``os.environ.get(...)`` call, or add the phrase
176+
'deliberately ignored' to the docstring line.
177+
"""
178+
in_use = _env_vars_in_use()
179+
violations: list[str] = []
180+
181+
for filepath in sorted(SRC_ROOT.rglob("*.py")):
182+
try:
183+
source = filepath.read_text(encoding="utf-8")
184+
tree = ast.parse(source, filename=str(filepath))
185+
except SyntaxError:
186+
# Syntax errors are caught by dedicated CI checks; skip here.
187+
continue # pragma: no cover
188+
189+
rel = str(filepath.relative_to(SRC_ROOT.parent.parent))
190+
for doc, lineno in _collect_docstrings(tree):
191+
for var in _env_vars_from_docstring(doc):
192+
if var not in in_use:
193+
violations.append(
194+
f"{rel}:{lineno}: docstring references {var!r} "
195+
f"but no os.environ read of {var!r} found in src/adcp/"
196+
)
197+
198+
assert not violations, (
199+
"Docstring/code env-var drift detected.\n"
200+
"Either add the missing os.environ read, or mark the var as "
201+
"'deliberately ignored' in the docstring.\n\n" + "\n".join(violations)
202+
)

0 commit comments

Comments
 (0)