Metabuilder-Labs · anilmurty · May 29, 2026 · May 29, 2026 · May 29, 2026
@@ -51,9 +51,11 @@ def _invoke(runner, config, args):
 
 # --- _collect_rows ---
 
-def test_empty_config_yields_no_rows_apart_from_default_alerts():
+def test_empty_config_yields_default_alerts_and_capture_rows():
     # Default AlertsConfig has one stdout channel — so the alerts row always
-    # shows. defaults.budget unset, no provider budgets, no agents, no capture.
+    # shows. capture also shows even when all toggles are off (#71 fix —
+    # an explicit "off" is still a policy choice worth surfacing).
+    # defaults.budget unset, no provider budgets, no agents.
     cfg = _empty_config()
     rows = _collect_rows(cfg)
     policies = [r.policy for r in rows]
@@ -62,7 +64,7 @@ def test_empty_config_yields_no_rows_apart_from_default_alerts():
     assert "defaults.budget" not in policies
     assert all(not p.startswith("budget.") for p in policies)
     assert all(not p.startswith("agents.") for p in policies)
-    assert "capture" not in policies
+    assert "capture" in policies
 
 
 def test_provider_budget_row_shows_usd_and_plan():
@@ -122,9 +124,11 @@ def test_agent_rows_emitted_only_for_overrides():
     assert "file_delete" in sa_row.setting
 
 
-def test_capture_row_only_when_any_flag_true():
+def test_capture_row_always_shown():
+    # Capture is a policy choice even when all toggles are off (#71 fix).
     cfg = _empty_config()
-    assert not any(r.policy == "capture" for r in _collect_rows(cfg))
+    row = next(r for r in _collect_rows(cfg) if r.policy == "capture")
+    assert "prompts=false" in row.setting
     cfg.capture = CaptureConfig(prompts=True)
     row = next(r for r in _collect_rows(cfg) if r.policy == "capture")
     assert "prompts=true" in row.setting

@@ -20,18 +20,38 @@ async def prometheus_metrics(request: Request) -> PlainTextResponse:
     lines: list[str] = []
 
     # -- Cost per agent --
+    # Prometheus requires each {metric, label_set} to appear at most once
+    # per scrape. `db.get_cost_summary(group_by="agent")` returns one row
+    # per (agent_id, model), which produces duplicate label sets when the
+    # same agent uses multiple models. Aggregate by agent_id here before
+    # emitting (#71 finding 8).
     _add_header(lines, "tj_cost_usd_total", "gauge", "Running cost total per agent")
     cost_rows = db.get_cost_summary(CostFilters(group_by="agent"))
+    agent_totals: dict[str, dict[str, float]] = {}
     for row in cost_rows:
         agent = row.agent_id or "unknown"
-        lines.append(f'tj_cost_usd_total{{agent_id="{_escape(agent)}"}} {row.cost_usd}')
+        bucket = agent_totals.setdefault(
+            agent, {"cost_usd": 0.0, "input_tokens": 0, "output_tokens": 0},
+        )
+        bucket["cost_usd"] += float(row.cost_usd or 0.0)
+        bucket["input_tokens"] += int(row.input_tokens or 0)
+        bucket["output_tokens"] += int(row.output_tokens or 0)
+    for agent, totals in agent_totals.items():
+        lines.append(
+            f'tj_cost_usd_total{{agent_id="{_escape(agent)}"}} {totals["cost_usd"]}'
+        )
 
     # -- Tokens per agent and type --
     _add_header(lines, "tj_tokens_total", "counter", "Token usage by type")
-    for row in cost_rows:
-        agent = row.agent_id or "unknown"
-        lines.append(f'tj_tokens_total{{agent_id="{_escape(agent)}",type="input"}} {row.input_tokens}')
-        lines.append(f'tj_tokens_total{{agent_id="{_escape(agent)}",type="output"}} {row.output_tokens}')
+    for agent, totals in agent_totals.items():
+        lines.append(
+            f'tj_tokens_total{{agent_id="{_escape(agent)}",type="input"}} '
+            f'{totals["input_tokens"]}'
+        )
+        lines.append(
+            f'tj_tokens_total{{agent_id="{_escape(agent)}",type="output"}} '
+            f'{totals["output_tokens"]}'
+        )
 
     # -- Tool calls per agent --
     tool_rows = db.get_tool_calls(None, None, None)

@@ -32,6 +32,8 @@ def cmd_cost(ctx: click.Context, agent: str | None, since: str,
     if compare:
         if hasattr(db, "conn"):
             until_dt = utcnow()
+            from tokenjam.core.cost import override_since_for_compare
+            since_dt = override_since_for_compare(compare, since_dt, until_dt)
             try:
                 diff = compute_cost_diff(db, since_dt, until_dt, compare, agent_id=agent)
             except ValueError as exc:

@@ -444,7 +444,18 @@ def _onboard_claude_code(
                 elif result.sessions_seen > 0:
                     backfill_msg = "history already up to date"
             except Exception as exc:
-                backfill_msg = f"skipped ({exc})"
+                # Friendly message for the most common case: daemon holds
+                # the DB write lock. Backfill is a writer and can't share
+                # the lock; raw DuckDB IO error is unhelpful (#71 finding 2).
+                _err = str(exc).lower()
+                if "lock" in _err or "i/o error" in _err or "io error" in _err:
+                    backfill_msg = (
+                        "skipped — daemon holds the DB write lock. "
+                        "Stop the daemon (`tj stop`) and re-run "
+                        "`tj backfill claude-code`."
+                    )
+                else:
+                    backfill_msg = f"skipped ({exc})"
     except Exception:
         pass
 

@@ -69,6 +69,28 @@ def _dominant_plan(plan_mix: dict[str, int]) -> str:
     return max(known.items(), key=lambda kv: kv[1])[0]
 
 
+def _config_declared_plan(config) -> str | None:
+    """
+    Return the user's currently-declared plan tier from config.
+
+    Pulls from `[budget.<provider>].plan` — the field set by
+    `tj onboard --reconfigure`. When multiple providers declare a plan
+    we surface the first one in sorted order (deterministic). Returns
+    None when no provider has a plan declared.
+
+    This is used by the optimize renderer to surface a note when the
+    historical plan-tier mix disagrees with the user's currently-
+    declared plan (#71 finding 1) — without overriding the data-driven
+    rendering, which would be dishonest about what actually happened.
+    """
+    budgets = getattr(config, "budgets", None) or {}
+    for provider in sorted(budgets.keys()):
+        plan = getattr(budgets[provider], "plan", None)
+        if plan:
+            return str(plan)
+    return None
+
+
 @click.command("optimize")
 @click.option("--agent", default=None, help="Scope to a specific agent_id.")
 @click.option("--since", default="30d", help="Window for analysis (default 30d).")
@@ -120,6 +142,16 @@ def cmd_optimize(
 
     until_dt = utcnow()
 
+    # If user passed --compare last-7d / last-30d / last-week, override
+    # --since so the analysis window matches the comparison period (#71
+    # finding 5). Without this, `tj optimize --compare last-7d` would do
+    # 30d-vs-30d (because --since defaults to 30d), while `tj cost` did
+    # 7d-vs-7d — same flag, two shapes.
+    if compare:
+        from tokenjam.core.cost import override_since_for_compare
+        since_dt = override_since_for_compare(compare, since_dt, until_dt)
+        since = f"{(until_dt - since_dt).days}d"
+
     # Two paths depending on whether the daemon holds the DB lock.
     #
     # Local DB available (no daemon, or we got handed a real DuckDBBackend) →
@@ -207,6 +239,7 @@ def cmd_optimize(
 
     dominant = _dominant_plan(plan_mix)
     pricing_mode = _pricing_mode_for(dominant)
+    declared_plan = _config_declared_plan(config)
 
     # --export-config branch: write the snippet to disk and exit. Skips
     # the normal rendering path. The user reads the snippet file and
@@ -276,6 +309,7 @@ def cmd_optimize(
     _render_report(
         report, agent=agent, plan_mix=plan_mix,
         dominant_plan=dominant, pricing_mode=pricing_mode,
+        declared_plan=declared_plan,
     )
     if cost_diff is not None:
         from tokenjam.cli.cmd_cost import _render_diff
@@ -313,6 +347,7 @@ def _render_report(
     plan_mix: dict[str, int] | None = None,
     dominant_plan: str = "unknown",
     pricing_mode: str = "unknown",
+    declared_plan: str | None = None,
 ) -> None:
     w = report.window
     scope_tag = f", {agent}" if agent else ""
@@ -375,6 +410,23 @@ def _render_report(
                 f"resolve.[/dim]\n"
             )
 
+    # Surface a divergence note when the user has reconfigured to a new plan
+    # but historical sessions still reflect the previous plan. Honest framing:
+    # show the data as it was actually generated, but flag that future
+    # sessions will be costed differently (#71 finding 1).
+    if (
+        declared_plan
+        and declared_plan != dominant_plan
+        and declared_plan in _PLAN_LABEL_AND_FEE  # only flag subscription deltas
+    ):
+        label, _ = _PLAN_LABEL_AND_FEE[declared_plan]
+        console.print(
+            f"[dim]Note: your config declares "
+            f"[bold]{label}[/bold] but historical sessions ran under "
+            f"a different plan — rendering reflects what actually ran. "
+            f"New sessions will use the configured plan.[/dim]\n"
+        )
+
     if w.sessions == 0:
         console.print("[dim]No sessions in window.[/dim]")
         return
@@ -589,7 +641,7 @@ def _export_snippet(
             plan_tier=dominant_plan,
             agent_id=agent_id,
         )
-        ext = "json"
+        ext = "jsonc"
     else:
         # Click's Choice() already constrained this; defensive only.
         raise click.ClickException(f"Unknown export target: {target}")

@@ -51,11 +51,15 @@ def cmd_policy() -> None:
 
 
 @cmd_policy.command("list")
+@click.option("--json", "output_json_flag", is_flag=True,
+              help="Emit machine-readable JSON.")
 @click.pass_context
-def cmd_policy_list(ctx: click.Context) -> None:
+def cmd_policy_list(ctx: click.Context, output_json_flag: bool) -> None:
     """List existing alerts, drift, schema, and budget configuration."""
     config: TjConfig = ctx.obj["config"]
-    output_json: bool = ctx.obj.get("output_json", False)
+    # Honour either the root `tj --json policy list` form or the
+    # command-level `tj policy list --json` form (#71 finding 6).
+    output_json: bool = output_json_flag or ctx.obj.get("output_json", False)
 
     rows = _collect_rows(config)
 
@@ -226,8 +230,9 @@ def _drift_summary(drift: DriftConfig) -> str:
 
 
 def _capture_rows(capture: CaptureConfig) -> list[PolicyRow]:
-    if not any([capture.prompts, capture.completions, capture.tool_inputs, capture.tool_outputs]):
-        return []
+    # Always emit the row — capture is a policy choice even when all four
+    # toggles are off (the default). Suppressing it hid the section from
+    # users who'd explicitly verified their privacy settings (#71 finding 7).
     parts = [
         f"prompts={str(capture.prompts).lower()}",
         f"completions={str(capture.completions).lower()}",

@@ -213,6 +213,32 @@ def parse_compare_window(
     return prev_since, prev_until
 
 
+def override_since_for_compare(
+    compare: str, default_since: datetime, current_until: datetime,
+) -> datetime:
+    """
+    Resolve `--compare` keywords that imply a *specific* current-window
+    length (`last-7d`, `last-30d`, `last-week`) to a `since` datetime that
+    makes the comparison symmetric.
+
+    Without this, `tj optimize --compare last-7d` would render a 30d-vs-30d
+    comparison (because `--since` defaults to 30d) while
+    `tj cost --compare last-7d` would render a 7d-vs-7d comparison (because
+    `--since` defaults to 7d) — the same flag producing different shapes
+    across commands (#71 finding 5). Forcing `last-Nd` to N days everywhere
+    gives the user the comparison they asked for.
+
+    Returns `default_since` unchanged for keywords without an implied window
+    length (`previous`, `last-month`) or explicit date ranges.
+    """
+    c = compare.strip().lower()
+    if c == "last-7d" or c == "last-week":
+        return current_until - timedelta(days=7)
+    if c == "last-30d":
+        return current_until - timedelta(days=30)
+    return default_since
+
+
 def compute_window_totals(
     conn, since: datetime, until: datetime, agent_id: str | None = None,
 ) -> WindowTotals:

@@ -28,7 +28,9 @@
 
 
 def _comment_block(lines: list[str]) -> str:
-    return "\n".join(f"  // {line}" for line in lines)
+    # 6-space indent so the comment block aligns with the surrounding
+    # "routing_recommendations" object body (#71 finding 4).
+    return "\n".join(f"      // {line}" for line in lines)
 
 
 def render_claude_code_snippet(