Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 9 additions & 5 deletions tests/unit/test_cmd_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,11 @@ def _invoke(runner, config, args):

# --- _collect_rows ---

def test_empty_config_yields_no_rows_apart_from_default_alerts():
def test_empty_config_yields_default_alerts_and_capture_rows():
# Default AlertsConfig has one stdout channel — so the alerts row always
# shows. defaults.budget unset, no provider budgets, no agents, no capture.
# shows. capture also shows even when all toggles are off (#71 fix —
# an explicit "off" is still a policy choice worth surfacing).
# defaults.budget unset, no provider budgets, no agents.
cfg = _empty_config()
rows = _collect_rows(cfg)
policies = [r.policy for r in rows]
Expand All @@ -62,7 +64,7 @@ def test_empty_config_yields_no_rows_apart_from_default_alerts():
assert "defaults.budget" not in policies
assert all(not p.startswith("budget.") for p in policies)
assert all(not p.startswith("agents.") for p in policies)
assert "capture" not in policies
assert "capture" in policies


def test_provider_budget_row_shows_usd_and_plan():
Expand Down Expand Up @@ -122,9 +124,11 @@ def test_agent_rows_emitted_only_for_overrides():
assert "file_delete" in sa_row.setting


def test_capture_row_only_when_any_flag_true():
def test_capture_row_always_shown():
# Capture is a policy choice even when all toggles are off (#71 fix).
cfg = _empty_config()
assert not any(r.policy == "capture" for r in _collect_rows(cfg))
row = next(r for r in _collect_rows(cfg) if r.policy == "capture")
assert "prompts=false" in row.setting
cfg.capture = CaptureConfig(prompts=True)
row = next(r for r in _collect_rows(cfg) if r.policy == "capture")
assert "prompts=true" in row.setting
Expand Down
30 changes: 25 additions & 5 deletions tokenjam/api/routes/metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,18 +20,38 @@ async def prometheus_metrics(request: Request) -> PlainTextResponse:
lines: list[str] = []

# -- Cost per agent --
# Prometheus requires each {metric, label_set} to appear at most once
# per scrape. `db.get_cost_summary(group_by="agent")` returns one row
# per (agent_id, model), which produces duplicate label sets when the
# same agent uses multiple models. Aggregate by agent_id here before
# emitting (#71 finding 8).
_add_header(lines, "tj_cost_usd_total", "gauge", "Running cost total per agent")
cost_rows = db.get_cost_summary(CostFilters(group_by="agent"))
agent_totals: dict[str, dict[str, float]] = {}
for row in cost_rows:
agent = row.agent_id or "unknown"
lines.append(f'tj_cost_usd_total{{agent_id="{_escape(agent)}"}} {row.cost_usd}')
bucket = agent_totals.setdefault(
agent, {"cost_usd": 0.0, "input_tokens": 0, "output_tokens": 0},
)
bucket["cost_usd"] += float(row.cost_usd or 0.0)
bucket["input_tokens"] += int(row.input_tokens or 0)
bucket["output_tokens"] += int(row.output_tokens or 0)
for agent, totals in agent_totals.items():
lines.append(
f'tj_cost_usd_total{{agent_id="{_escape(agent)}"}} {totals["cost_usd"]}'
)

# -- Tokens per agent and type --
_add_header(lines, "tj_tokens_total", "counter", "Token usage by type")
for row in cost_rows:
agent = row.agent_id or "unknown"
lines.append(f'tj_tokens_total{{agent_id="{_escape(agent)}",type="input"}} {row.input_tokens}')
lines.append(f'tj_tokens_total{{agent_id="{_escape(agent)}",type="output"}} {row.output_tokens}')
for agent, totals in agent_totals.items():
lines.append(
f'tj_tokens_total{{agent_id="{_escape(agent)}",type="input"}} '
f'{totals["input_tokens"]}'
)
lines.append(
f'tj_tokens_total{{agent_id="{_escape(agent)}",type="output"}} '
f'{totals["output_tokens"]}'
)

# -- Tool calls per agent --
tool_rows = db.get_tool_calls(None, None, None)
Expand Down
2 changes: 2 additions & 0 deletions tokenjam/cli/cmd_cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,8 @@ def cmd_cost(ctx: click.Context, agent: str | None, since: str,
if compare:
if hasattr(db, "conn"):
until_dt = utcnow()
from tokenjam.core.cost import override_since_for_compare
since_dt = override_since_for_compare(compare, since_dt, until_dt)
try:
diff = compute_cost_diff(db, since_dt, until_dt, compare, agent_id=agent)
except ValueError as exc:
Expand Down
13 changes: 12 additions & 1 deletion tokenjam/cli/cmd_onboard.py
Original file line number Diff line number Diff line change
Expand Up @@ -444,7 +444,18 @@ def _onboard_claude_code(
elif result.sessions_seen > 0:
backfill_msg = "history already up to date"
except Exception as exc:
backfill_msg = f"skipped ({exc})"
# Friendly message for the most common case: daemon holds
# the DB write lock. Backfill is a writer and can't share
# the lock; raw DuckDB IO error is unhelpful (#71 finding 2).
_err = str(exc).lower()
if "lock" in _err or "i/o error" in _err or "io error" in _err:
backfill_msg = (
"skipped — daemon holds the DB write lock. "
"Stop the daemon (`tj stop`) and re-run "
"`tj backfill claude-code`."
)
else:
backfill_msg = f"skipped ({exc})"
except Exception:
pass

Expand Down
54 changes: 53 additions & 1 deletion tokenjam/cli/cmd_optimize.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,28 @@ def _dominant_plan(plan_mix: dict[str, int]) -> str:
return max(known.items(), key=lambda kv: kv[1])[0]


def _config_declared_plan(config) -> str | None:
"""
Return the user's currently-declared plan tier from config.

Pulls from `[budget.<provider>].plan` — the field set by
`tj onboard --reconfigure`. When multiple providers declare a plan
we surface the first one in sorted order (deterministic). Returns
None when no provider has a plan declared.

This is used by the optimize renderer to surface a note when the
historical plan-tier mix disagrees with the user's currently-
declared plan (#71 finding 1) — without overriding the data-driven
rendering, which would be dishonest about what actually happened.
"""
budgets = getattr(config, "budgets", None) or {}
for provider in sorted(budgets.keys()):
plan = getattr(budgets[provider], "plan", None)
if plan:
return str(plan)
return None


@click.command("optimize")
@click.option("--agent", default=None, help="Scope to a specific agent_id.")
@click.option("--since", default="30d", help="Window for analysis (default 30d).")
Expand Down Expand Up @@ -120,6 +142,16 @@ def cmd_optimize(

until_dt = utcnow()

# If user passed --compare last-7d / last-30d / last-week, override
# --since so the analysis window matches the comparison period (#71
# finding 5). Without this, `tj optimize --compare last-7d` would do
# 30d-vs-30d (because --since defaults to 30d), while `tj cost` did
# 7d-vs-7d — same flag, two shapes.
if compare:
from tokenjam.core.cost import override_since_for_compare
since_dt = override_since_for_compare(compare, since_dt, until_dt)
since = f"{(until_dt - since_dt).days}d"

# Two paths depending on whether the daemon holds the DB lock.
#
# Local DB available (no daemon, or we got handed a real DuckDBBackend) →
Expand Down Expand Up @@ -207,6 +239,7 @@ def cmd_optimize(

dominant = _dominant_plan(plan_mix)
pricing_mode = _pricing_mode_for(dominant)
declared_plan = _config_declared_plan(config)

# --export-config branch: write the snippet to disk and exit. Skips
# the normal rendering path. The user reads the snippet file and
Expand Down Expand Up @@ -276,6 +309,7 @@ def cmd_optimize(
_render_report(
report, agent=agent, plan_mix=plan_mix,
dominant_plan=dominant, pricing_mode=pricing_mode,
declared_plan=declared_plan,
)
if cost_diff is not None:
from tokenjam.cli.cmd_cost import _render_diff
Expand Down Expand Up @@ -313,6 +347,7 @@ def _render_report(
plan_mix: dict[str, int] | None = None,
dominant_plan: str = "unknown",
pricing_mode: str = "unknown",
declared_plan: str | None = None,
) -> None:
w = report.window
scope_tag = f", {agent}" if agent else ""
Expand Down Expand Up @@ -375,6 +410,23 @@ def _render_report(
f"resolve.[/dim]\n"
)

# Surface a divergence note when the user has reconfigured to a new plan
# but historical sessions still reflect the previous plan. Honest framing:
# show the data as it was actually generated, but flag that future
# sessions will be costed differently (#71 finding 1).
if (
declared_plan
and declared_plan != dominant_plan
and declared_plan in _PLAN_LABEL_AND_FEE # only flag subscription deltas
):
label, _ = _PLAN_LABEL_AND_FEE[declared_plan]
console.print(
f"[dim]Note: your config declares "
f"[bold]{label}[/bold] but historical sessions ran under "
f"a different plan — rendering reflects what actually ran. "
f"New sessions will use the configured plan.[/dim]\n"
)

if w.sessions == 0:
console.print("[dim]No sessions in window.[/dim]")
return
Expand Down Expand Up @@ -589,7 +641,7 @@ def _export_snippet(
plan_tier=dominant_plan,
agent_id=agent_id,
)
ext = "json"
ext = "jsonc"
else:
# Click's Choice() already constrained this; defensive only.
raise click.ClickException(f"Unknown export target: {target}")
Expand Down
13 changes: 9 additions & 4 deletions tokenjam/cli/cmd_policy.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,11 +51,15 @@ def cmd_policy() -> None:


@cmd_policy.command("list")
@click.option("--json", "output_json_flag", is_flag=True,
help="Emit machine-readable JSON.")
@click.pass_context
def cmd_policy_list(ctx: click.Context) -> None:
def cmd_policy_list(ctx: click.Context, output_json_flag: bool) -> None:
"""List existing alerts, drift, schema, and budget configuration."""
config: TjConfig = ctx.obj["config"]
output_json: bool = ctx.obj.get("output_json", False)
# Honour either the root `tj --json policy list` form or the
# command-level `tj policy list --json` form (#71 finding 6).
output_json: bool = output_json_flag or ctx.obj.get("output_json", False)

rows = _collect_rows(config)

Expand Down Expand Up @@ -226,8 +230,9 @@ def _drift_summary(drift: DriftConfig) -> str:


def _capture_rows(capture: CaptureConfig) -> list[PolicyRow]:
if not any([capture.prompts, capture.completions, capture.tool_inputs, capture.tool_outputs]):
return []
# Always emit the row — capture is a policy choice even when all four
# toggles are off (the default). Suppressing it hid the section from
# users who'd explicitly verified their privacy settings (#71 finding 7).
parts = [
f"prompts={str(capture.prompts).lower()}",
f"completions={str(capture.completions).lower()}",
Expand Down
26 changes: 26 additions & 0 deletions tokenjam/core/cost.py
Original file line number Diff line number Diff line change
Expand Up @@ -213,6 +213,32 @@ def parse_compare_window(
return prev_since, prev_until


def override_since_for_compare(
compare: str, default_since: datetime, current_until: datetime,
) -> datetime:
"""
Resolve `--compare` keywords that imply a *specific* current-window
length (`last-7d`, `last-30d`, `last-week`) to a `since` datetime that
makes the comparison symmetric.

Without this, `tj optimize --compare last-7d` would render a 30d-vs-30d
comparison (because `--since` defaults to 30d) while
`tj cost --compare last-7d` would render a 7d-vs-7d comparison (because
`--since` defaults to 7d) — the same flag producing different shapes
across commands (#71 finding 5). Forcing `last-Nd` to N days everywhere
gives the user the comparison they asked for.

Returns `default_since` unchanged for keywords without an implied window
length (`previous`, `last-month`) or explicit date ranges.
"""
c = compare.strip().lower()
if c == "last-7d" or c == "last-week":
return current_until - timedelta(days=7)
if c == "last-30d":
return current_until - timedelta(days=30)
return default_since


def compute_window_totals(
conn, since: datetime, until: datetime, agent_id: str | None = None,
) -> WindowTotals:
Expand Down
4 changes: 3 additions & 1 deletion tokenjam/core/export/claude_code.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@


def _comment_block(lines: list[str]) -> str:
return "\n".join(f" // {line}" for line in lines)
# 6-space indent so the comment block aligns with the surrounding
# "routing_recommendations" object body (#71 finding 4).
return "\n".join(f" // {line}" for line in lines)


def render_claude_code_snippet(
Expand Down
Loading