Skip to content

Commit 17df4b9

Browse files
bokelleyclaude
andcommitted
fix(migrate): don't corrupt mixed numbered+unknown imports under --auto-apply
Per review on #540 — a generated_poc import mixing a known numbered asset (Assets81) with an unknown one (Assets149) was getting half-rewritten: Input : from adcp.types.generated_poc.core.format import Assets81, Assets149 Output: from adcp.types.generated_poc.core.format import VideoFormatAsset, Assets149 leaving VideoFormatAsset imported from a private module path that doesn't export it — guaranteed ImportError in adopter source. Root cause: step 1 (file-wide Assets<N> substitution) ran before step 2 (import-path safety check) had a chance to veto the line. Fix: line-by-line rewrite. For each generated_poc single-line import, compute the post-numbered-substitution symbol set up-front; only run both substitutions when every symbol on the line is in ``_AUTO_APPLY_PUBLIC_SYMBOLS``. Mixed-unsafe lines are left entirely alone. Non-import lines still get free numbered substitution. Scan-time also fixed: numbered references on a mixed-unsafe import line now report ``flag_numbered`` instead of ``auto_applied`` so the report matches the file content. Adds regression test ``test_auto_apply_mixed_numbered_known_unknown_does_not_corrupt_import``. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 1f70c45 commit 17df4b9

2 files changed

Lines changed: 101 additions & 50 deletions

File tree

src/adcp/migrate/v3_to_v4.py

Lines changed: 62 additions & 40 deletions
Original file line numberDiff line numberDiff line change
@@ -312,9 +312,7 @@ def _iter_python_files(root: Path) -> list[Path]:
312312
# Regex used in the ``--auto-apply`` import-path fix pass: matches the
313313
# ``from adcp.types.generated_poc...`` prefix so it can be replaced with
314314
# ``from adcp.types``.
315-
_GENERATED_POC_MODULE_RE = re.compile(
316-
r"from\s+adcp\.types\.generated_poc(?:\.[\w.]+)?\s+import"
317-
)
315+
_GENERATED_POC_MODULE_RE = re.compile(r"from\s+adcp\.types\.generated_poc(?:\.[\w.]+)?\s+import")
318316

319317
# Union of symbol names that ``--auto-apply`` can safely reroute to
320318
# ``adcp.types``: the explicit flag_private symbol map plus every public
@@ -359,6 +357,28 @@ def scan_file(
359357
auto_apply_hits = False # any numbered or private-import rewrites queued
360358

361359
for lineno, line in enumerate(original.splitlines(), start=1):
360+
# Pre-pass: when this line is a single-line ``generated_poc``
361+
# import, decide whether the line as a whole is auto-apply-safe.
362+
# An import is *unsafe* when at least one of its symbols (after
363+
# the hypothetical numbered substitution) isn't in
364+
# ``_AUTO_APPLY_PUBLIC_SYMBOLS``; rewriting one symbol while
365+
# leaving another behind would leave the line importing a
366+
# public name from a private module — guaranteed ImportError.
367+
# The rewrite block (`updated.splitlines()` later) skips
368+
# unsafe-mixed lines; the per-symbol Finding emission below
369+
# also treats numbered references on those lines as
370+
# ``flag_numbered`` rather than ``auto_applied`` so the report
371+
# matches the file content.
372+
line_is_mixed_unsafe_import = False
373+
if "adcp.types.generated_poc" in line:
374+
from_match = _GENERATED_POC_FROM_IMPORT.search(line)
375+
if from_match:
376+
raw_syms = [s.strip() for s in from_match.group(1).split(",")]
377+
pre_syms = [r.split(" as ")[0].strip() for r in raw_syms if r.strip()]
378+
post_syms = [NUMBERED_ASSETS_RENAMES.get(s, s) for s in pre_syms]
379+
if pre_syms and not all(s in _AUTO_APPLY_PUBLIC_SYMBOLS for s in post_syms):
380+
line_is_mixed_unsafe_import = True
381+
362382
for old, new in ASSET_CONTENT_RENAMES.items():
363383
for match in _RENAME_PATTERNS[old].finditer(line):
364384
findings.append(
@@ -392,7 +412,7 @@ def scan_file(
392412
for match in NUMBERED_ASSETS_PATTERN.finditer(line):
393413
symbol = match.group(0)
394414
alias = NUMBERED_ASSETS_RENAMES.get(symbol)
395-
if auto_apply and alias is not None:
415+
if auto_apply and alias is not None and not line_is_mixed_unsafe_import:
396416
findings.append(
397417
Finding(
398418
kind="auto_applied",
@@ -470,8 +490,7 @@ def scan_file(
470490
continue
471491

472492
all_known = all(
473-
repl is not None
474-
or (auto_apply and symbol in NUMBERED_ASSETS_RENAMES)
493+
repl is not None or (auto_apply and symbol in NUMBERED_ASSETS_RENAMES)
475494
for symbol, repl in parsed
476495
)
477496

@@ -551,38 +570,45 @@ def scan_file(
551570
needs_write = True
552571

553572
if apply_changes and auto_apply and auto_apply_hits:
554-
# Step 1: substitute Assets<N> → SemanticAlias everywhere
555-
# (handles both usage sites and import symbols).
556-
for old, new in NUMBERED_ASSETS_RENAMES.items():
557-
updated = _NUMBERED_RENAME_PATTERNS[old].sub(new, updated)
558-
559-
# Step 2: fix any generated_poc import whose symbols are now all
560-
# resolvable to adcp.types. This covers two cases:
561-
#
562-
# a. ``from generated_poc.core.format import Assets81`` became
563-
# ``from generated_poc.core.format import VideoFormatAsset``
564-
# after step 1 — rewrite the module path.
565-
#
566-
# b. ``from generated_poc.core.x import ContextObject`` whose
567-
# all-known flag_private findings were promoted to auto_applied
568-
# — rewrite the module path here too.
569-
#
570-
# The check uses ``_AUTO_APPLY_PUBLIC_SYMBOLS`` (a frozen set of all
571-
# known-safe names) so we never fix an import that still references
572-
# an unknown symbol.
573+
# Process the file line-by-line so generated_poc imports get a
574+
# safety check against the post-numbered-substitution symbol set
575+
# before any rewrite happens. The earlier "Step 1: substitute
576+
# Assets<N> file-wide; Step 2: fix import paths only when safe"
577+
# ordering corrupted mixed lines like
578+
# ``from generated_poc.core.format import Assets81, Assets149``
579+
# — Assets81 became VideoFormatAsset while Assets149 stayed,
580+
# leaving VideoFormatAsset imported from a private module.
573581
new_lines: list[str] = []
574582
for text_line in updated.splitlines(keepends=True):
575-
if "adcp.types.generated_poc" not in text_line:
583+
is_generated_poc_import = (
584+
"adcp.types.generated_poc" in text_line
585+
and _GENERATED_POC_FROM_IMPORT.search(text_line) is not None
586+
)
587+
if is_generated_poc_import:
588+
m = _GENERATED_POC_FROM_IMPORT.search(text_line)
589+
assert m is not None # narrowed above
590+
raw_syms = [s.strip() for s in m.group(1).split(",")]
591+
pre_syms = [r.split(" as ")[0].strip() for r in raw_syms if r.strip()]
592+
# Apply the hypothetical numbered rename to each symbol
593+
# so we can check if the *post-rename* symbol set is
594+
# safe.
595+
post_syms = [NUMBERED_ASSETS_RENAMES.get(s, s) for s in pre_syms]
596+
if post_syms and all(s in _AUTO_APPLY_PUBLIC_SYMBOLS for s in post_syms):
597+
# Whole import is safe — substitute numbered names
598+
# AND fix the module path.
599+
for old, new in NUMBERED_ASSETS_RENAMES.items():
600+
text_line = _NUMBERED_RENAME_PATTERNS[old].sub(new, text_line)
601+
text_line = _GENERATED_POC_MODULE_RE.sub("from adcp.types import", text_line)
602+
# Mixed line — leave it alone. The findings list still
603+
# carries the per-symbol flag_private and flag_numbered
604+
# entries so the adopter sees the work to do.
576605
new_lines.append(text_line)
577606
continue
578-
m = _GENERATED_POC_FROM_IMPORT.search(text_line)
579-
if m:
580-
raw_syms = [s.strip() for s in m.group(1).split(",")]
581-
syms = [r.split(" as ")[0].strip() for r in raw_syms if r.strip()]
582-
if syms and all(sym in _AUTO_APPLY_PUBLIC_SYMBOLS for sym in syms):
583-
text_line = _GENERATED_POC_MODULE_RE.sub(
584-
"from adcp.types import", text_line
585-
)
607+
# Non-import lines: substitute numbered names freely (the
608+
# semantic alias is already importable via adcp.types and
609+
# any local reference the line carries is a usage site).
610+
for old, new in NUMBERED_ASSETS_RENAMES.items():
611+
text_line = _NUMBERED_RENAME_PATTERNS[old].sub(new, text_line)
586612
new_lines.append(text_line)
587613
updated = "".join(new_lines)
588614
needs_write = True
@@ -597,9 +623,7 @@ def run(root: Path, *, apply_changes: bool = False, auto_apply: bool = False) ->
597623
report = Report()
598624
for path in _iter_python_files(root):
599625
report.scanned_files += 1
600-
findings, new_contents = scan_file(
601-
path, apply_changes=apply_changes, auto_apply=auto_apply
602-
)
626+
findings, new_contents = scan_file(path, apply_changes=apply_changes, auto_apply=auto_apply)
603627
for f in findings:
604628
report.add(f)
605629
if new_contents is not None:
@@ -691,9 +715,7 @@ def _format_text_report(report: Report, *, apply_changes: bool, auto_apply: bool
691715
lines.append(f"Rewrote {report.rewritten_files} files in place.")
692716
lines.append("Review with `git diff` before committing.")
693717

694-
if not auto_apply and any(
695-
f.kind in ("flag_private", "flag_numbered") for f in report.flagged
696-
):
718+
if not auto_apply and any(f.kind in ("flag_private", "flag_numbered") for f in report.flagged):
697719
lines.append("")
698720
lines.append(
699721
"Tip: rerun with --auto-apply to mechanically fix the "

tests/test_migrate_v3_to_v4.py

Lines changed: 39 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -618,8 +618,7 @@ def test_auto_apply_rewrites_all_known_private_import(tmp_path: Path) -> None:
618618
path = _write(
619619
tmp_path,
620620
"code.py",
621-
"from adcp.types.generated_poc.core.context import ContextObject\n"
622-
"x = ContextObject()\n",
621+
"from adcp.types.generated_poc.core.context import ContextObject\n" "x = ContextObject()\n",
623622
)
624623
report = v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
625624

@@ -682,7 +681,8 @@ def test_auto_apply_mixed_line_not_rewritten(tmp_path: Path) -> None:
682681

683682
# Unknown symbol gets a generic flag too (silent-drop bug is fixed).
684683
generic_flags = [
685-
f for f in report.flagged
684+
f
685+
for f in report.flagged
686686
if f.kind == "flag_private" and f.before == "adcp.types.generated_poc"
687687
]
688688
assert len(generic_flags) >= 1
@@ -718,16 +718,15 @@ def test_auto_apply_rewrites_numbered_asset_and_fixes_import_path(tmp_path: Path
718718
path = _write(
719719
tmp_path,
720720
"code.py",
721-
"from adcp.types.generated_poc.core.format import Assets81\n"
722-
"slot: Assets81\n",
721+
"from adcp.types.generated_poc.core.format import Assets81\n" "slot: Assets81\n",
723722
)
724723
v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
725724

726725
rewritten = path.read_text()
727726
assert "Assets81" not in rewritten
728-
assert "adcp.types.generated_poc" not in rewritten, (
729-
"generated_poc import path must be fixed after numbered rename"
730-
)
727+
assert (
728+
"adcp.types.generated_poc" not in rewritten
729+
), "generated_poc import path must be fixed after numbered rename"
731730
assert "from adcp.types import VideoFormatAsset" in rewritten
732731
assert "slot: VideoFormatAsset" in rewritten
733732

@@ -756,8 +755,7 @@ def test_auto_apply_numbered_plus_known_symbol_same_line(tmp_path: Path) -> None
756755
path = _write(
757756
tmp_path,
758757
"code.py",
759-
"from adcp.types.generated_poc.core.x import Assets81, ContextObject\n"
760-
"slot: Assets81\n",
758+
"from adcp.types.generated_poc.core.x import Assets81, ContextObject\n" "slot: Assets81\n",
761759
)
762760
report = v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
763761

@@ -773,6 +771,37 @@ def test_auto_apply_numbered_plus_known_symbol_same_line(tmp_path: Path) -> None
773771
assert not any(f.kind == "flag_private" for f in report.flagged)
774772

775773

774+
def test_auto_apply_mixed_numbered_known_unknown_does_not_corrupt_import(
775+
tmp_path: Path,
776+
) -> None:
777+
"""Regression: a generated_poc import mixing a known numbered asset
778+
(Assets81) with an unknown numbered asset (Assets149) MUST NOT
779+
silently rewrite Assets81 alone — that would leave VideoFormatAsset
780+
imported from a private module path that doesn't export it
781+
(guaranteed ImportError in adopter source)."""
782+
path = _write(
783+
tmp_path,
784+
"code.py",
785+
"from adcp.types.generated_poc.core.format import Assets81, Assets149\n",
786+
)
787+
report = v3_to_v4.run(tmp_path, apply_changes=True, auto_apply=True)
788+
789+
# File content must be untouched on the unsafe-mixed line.
790+
rewritten = path.read_text()
791+
assert "from adcp.types.generated_poc.core.format import Assets81, Assets149" in rewritten
792+
assert "VideoFormatAsset" not in rewritten
793+
794+
# Findings: Assets81 and Assets149 are both flagged for review (the
795+
# adopter has to split the import or wait for Assets149 to land in
796+
# the rename table). Neither is reported as auto_applied.
797+
auto_applied_names = {f.before for f in report.auto_applied}
798+
assert "Assets81" not in auto_applied_names
799+
assert "Assets149" not in auto_applied_names
800+
flagged_names = {f.before for f in report.flagged if f.kind == "flag_numbered"}
801+
assert "Assets81" in flagged_names
802+
assert "Assets149" in flagged_names
803+
804+
776805
# ---------------------------------------------------------------------------
777806
# --auto-apply: combined behaviour + idempotency
778807
# ---------------------------------------------------------------------------

0 commit comments

Comments
 (0)