Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 10 additions & 0 deletions .cursor/skills/proof/examples/example_dag.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,9 @@
{
"title": "Build a tiny CLI todo app",
"converge": {
"on": "review",
"maxIterations": 2
},
"tasks": [
{
"id": "research-stack",
Expand Down Expand Up @@ -36,6 +40,12 @@
"depends_on": ["implement"],
"complexity": "LOW",
"subtask_prompt": "Write a short `README.md` in the cwd describing what `todo.mjs` does, the supported commands with examples, and where data is stored. Do not modify `todo.mjs`."
},
{
"id": "review",
"depends_on": ["tests", "docs"],
"complexity": "LOW",
"subtask_prompt": "Review the implementation and test/docs outputs for blockers or high-severity issues. Output exactly the sections `## Blockers`, `## High-severity findings`, and `## Medium-severity findings`."
}
]
}
21 changes: 20 additions & 1 deletion packages/proof/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,10 @@ Create a DAG JSON file:
```json
{
"title": "Build a tiny CLI todo app",
"converge": {
"on": "review",
"maxIterations": 2
},
"tasks": [
{
"id": "design",
Expand All @@ -33,6 +37,12 @@ Create a DAG JSON file:
"depends_on": ["design"],
"complexity": "MED",
"subtask_prompt": "Implement the todo CLI based on the design."
},
{
"id": "review",
"depends_on": ["implement"],
"complexity": "LOW",
"subtask_prompt": "Review the implementation for blockers and high-severity issues. Output `## Blockers` and `## High-severity findings` sections."
}
]
}
Expand Down Expand Up @@ -73,6 +83,11 @@ Optional task kinds add control gates:
- `kind: "oracle"` runs a shell command and records pass/fail evidence.
- `kind: "pause"` waits for a checkpoint sentinel so a human can inspect or approve before downstream work continues.

Optional top-level controls let DAG authors tune execution:

- `budget`: set DAG-wide soft ceilings like `maxIterations` and `maxTokensTotal`.
- `converge`: set `{ "on": "<task-id>", "maxIterations"?: <positive-int> }` so the DAG itself names the convergence task and can lower or raise the default loop ceiling. CLI flags `--converge-on` and `--max-iterations` still win when both are present.

## Project Skill

The canonical Cursor skill entrypoint lives at:
Expand Down Expand Up @@ -107,6 +122,7 @@ pnpm -F @flatbread/proof build
```bash
pnpm -F @flatbread/proof typecheck
pnpm -F @flatbread/proof build
pnpm -F @flatbread/proof test
pnpm -F @flatbread/proof models:list
pnpm exec proof --dry-check-cmds --dag .cursor/skills/proof/examples/example_dag.json
```
Expand All @@ -117,13 +133,16 @@ Proof also exposes helpers for tooling:

```ts
import {
DEFAULT_MAX_ITERATIONS,
computeRanks,
createModelResolver,
parseDAG,
resolveConvergenceConfig,
runDryCheck,
type DAG,
type DAGConverge,
type TaskState,
} from '@flatbread/proof';
```

The public API includes DAG parsing and rank computation, model resolution, canvas state types, convergence helpers, dry command checks, oracle and pause helpers, and self-hosting state utilities.
The public API includes DAG parsing and rank computation, convergence config resolution, model resolution, canvas state types, convergence helpers, dry command checks, oracle and pause helpers, and self-hosting state utilities.
1 change: 1 addition & 0 deletions packages/proof/package.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
"scripts": {
"build": "tsup",
"dev": "tsup --watch src",
"test": "tsx --test test/**/*.test.ts",
"typecheck": "tsc -p tsconfig.json --noEmit",
"models:list": "tsx src/list_models.ts"
},
Expand Down
33 changes: 33 additions & 0 deletions packages/proof/src/convergence_config.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import type { DAG } from './dag.js';

export interface CliConvergenceOverrides {
convergeOn?: string;
maxIterations?: number;
}

export interface ResolvedConvergenceConfig {
on?: string;
maxIterations: number;
}

/** Matches the historical `--max-iterations` default when DAG JSON omits a ceiling. */
export const DEFAULT_MAX_ITERATIONS = 3;

export function resolveConvergenceConfig(
dag: DAG,
overrides: CliConvergenceOverrides = {}
): ResolvedConvergenceConfig {
const on = overrides.convergeOn ?? dag.converge?.on;
if (on !== undefined && !dag.tasks.some((task) => task.id === on)) {
throw new Error(
`Convergence target "${on}" is not a task id in DAG "${dag.title}"`
);
}
return {
on,
maxIterations:
overrides.maxIterations ??
dag.converge?.maxIterations ??
DEFAULT_MAX_ITERATIONS,
};
}
43 changes: 42 additions & 1 deletion packages/proof/src/dag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ export interface DAG {
models?: ModelMapOverride;
framing?: string;
budget?: DAGBudget;
converge?: DAGConverge;
tasks: RawTask[];
}

Expand All @@ -77,6 +78,11 @@ export interface DAGBudget {
maxTokensTotal?: number;
}

export interface DAGConverge {
on: string;
maxIterations?: number;
}

const COMPLEXITY_VALUES = new Set<Complexity>(['HIGH', 'MED', 'LOW']);
const COMPLEXITY_KEYS: readonly Complexity[] = ['HIGH', 'MED', 'LOW'] as const;
const TASK_KIND_VALUES = new Set<TaskKind>(['task', 'pause', 'oracle']);
Expand Down Expand Up @@ -162,8 +168,12 @@ export function parseDAG(raw: unknown): DAG {
obj.framing === undefined ? undefined : validateFraming(obj.framing);
const budget =
obj.budget === undefined ? undefined : validateBudget(obj.budget);
const converge =
obj.converge === undefined
? undefined
: validateConverge(obj.converge, ids);

return { title: obj.title, models, framing, budget, tasks };
return { title: obj.title, models, framing, budget, converge, tasks };
}

function validateFraming(raw: unknown): string {
Expand Down Expand Up @@ -199,6 +209,37 @@ function validateBudgetNumber(
}
}

function validatePositiveInteger(
raw: unknown,
label: string
): asserts raw is number {
if (typeof raw !== 'number' || !Number.isSafeInteger(raw) || raw <= 0) {
throw new Error(`${label} must be a positive integer when set.`);
}
}

function validateConverge(
raw: unknown,
taskIds: ReadonlySet<string>
): DAGConverge {
if (!raw || typeof raw !== 'object' || Array.isArray(raw)) {
throw new Error('DAG.converge must be a JSON object when set.');
}
const obj = raw as Record<string, unknown>;
if (typeof obj.on !== 'string' || obj.on.trim() === '') {
throw new Error('DAG.converge.on must be a non-empty string.');
}
if (!taskIds.has(obj.on)) {
throw new Error(`DAG.converge.on references unknown task id: ${obj.on}`);
}
const converge: DAGConverge = { on: obj.on };
if (obj.maxIterations !== undefined) {
validatePositiveInteger(obj.maxIterations, 'DAG.converge.maxIterations');
converge.maxIterations = obj.maxIterations;
}
return converge;
}

function validateTask(raw: unknown, index: number): RawTask {
if (!raw || typeof raw !== 'object') {
throw new Error(`tasks[${index}] must be an object.`);
Expand Down
10 changes: 10 additions & 0 deletions packages/proof/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,22 @@ export type {
Complexity,
DAG,
DAGBudget,
DAGConverge,
ModelMap,
ModelMapOverride,
RawTask,
TaskKind,
} from './dag.js';

export {
DEFAULT_MAX_ITERATIONS,
resolveConvergenceConfig,
} from './convergence_config.js';
export type {
CliConvergenceOverrides,
ResolvedConvergenceConfig,
} from './convergence_config.js';

export { CanvasWriter, initialRunState } from './canvas_writer.js';
export type { RunState, TaskState, TaskStatus } from './canvas_writer.js';

Expand Down
73 changes: 40 additions & 33 deletions packages/proof/src/run_dag.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,17 @@
* through the same parser as regular tasks.
* --checkpoint-dir <path> Directory for `kind: 'pause'` sentinel files
* (default `.proof/` under --cwd).
* --converge-on <task-id> After the main DAG run, parse the named task's
* `resultText` for `## Blockers` /
* `## High-severity findings`. If non-empty,
* re-execute the entire upstream ancestor
* subtree with the convergence task's latest
* result appended as context, then re-execute
* the convergence task. Loop until clean or
* --max-iterations is reached.
* --max-iterations <N> Convergence iteration ceiling (default: 3).
* --converge-on <task-id> Override DAG `converge.on`. After the main DAG
* run, parse the named task's `resultText` for
* `## Blockers` / `## High-severity findings`.
* If non-empty, re-execute the entire upstream
* ancestor subtree with the convergence task's
* latest result appended as context, then
* re-execute the convergence task. Loop until
* clean or the effective max iteration ceiling
* is reached.
* --max-iterations <N> Override DAG `converge.maxIterations`
* (default fallback: 3).
* --state-path <path> Persist resumable runner state after each rank.
* Defaults to `.proof/run-state.json` when
* --restart-on-runner-change is enabled.
Expand Down Expand Up @@ -93,6 +95,7 @@ import type {
RawTask,
TaskKind,
} from './dag.js';
import { resolveConvergenceConfig } from './convergence_config.js';
import {
CanvasWriter,
initialRunState,
Expand Down Expand Up @@ -161,10 +164,10 @@ interface CliArgs {
dryCheckCmds: boolean;
/** Absolute dir for `kind: 'pause'` sentinel files. Defaults to `<cwd>/.proof`. */
checkpointDir: string;
/** When set, the runner re-executes ancestors after the named task to converge on a clean review. */
/** CLI override for DAG `converge.on`. */
convergeOn?: string;
/** Convergence iteration ceiling (default 3). */
maxIterations: number;
/** CLI override for DAG `converge.maxIterations`. */
maxIterations?: number;
/** Optional resumable state path. If omitted, no state file is written. */
statePath?: string;
/** Load prior `RunState` from this path before executing ranks. */
Expand Down Expand Up @@ -240,9 +243,8 @@ function parseArgs(argv: string[]): CliArgs {
DEFAULT_STREAM_IDLE_TIMEOUT_MS,
'--stream-idle-timeout-ms'
);
const maxIterations = parsePositiveInt(
const maxIterations = parseOptionalPositiveInt(
args['max-iterations'],
DEFAULT_MAX_ITERATIONS,
'--max-iterations'
);
const fullOutputRaw = args['full-output-dir'];
Expand Down Expand Up @@ -315,6 +317,14 @@ function parsePositiveInt(
return n;
}

function parseOptionalPositiveInt(
raw: string | undefined,
flag: string
): number | undefined {
if (raw === undefined) return undefined;
return parsePositiveInt(raw, 1, flag);
}

interface ModelOverrideSources {
dagModels: ModelMapOverride | undefined;
fileModels: ModelMapOverride | undefined;
Expand Down Expand Up @@ -495,6 +505,10 @@ async function main(): Promise<void> {
const dagPath = resolveAgainstCwd(args.dag, args.cwd);
const raw = JSON.parse(await readFile(dagPath, 'utf8'));
const dag = parseDAG(raw);
const convergence = resolveConvergenceConfig(dag, {
convergeOn: args.convergeOn,
maxIterations: args.maxIterations,
});
const fileModels =
args.modelsFile === undefined
? undefined
Expand All @@ -509,12 +523,6 @@ async function main(): Promise<void> {
);
const ranks = computeRanks(dag);

if (args.convergeOn && !dag.tasks.some((t) => t.id === args.convergeOn)) {
throw new Error(
`--converge-on "${args.convergeOn}" is not a task id in DAG "${dag.title}"`
);
}

const fullOutputAbsoluteDir =
args.fullOutputDir !== undefined
? resolveAgainstCwd(args.fullOutputDir, args.cwd)
Expand Down Expand Up @@ -778,10 +786,10 @@ async function main(): Promise<void> {
}

await maybeRestartAfterRunnerChange('main ranks before convergence');
if (args.convergeOn) {
if (convergence.on) {
await runConvergenceLoop({
convergeOn: args.convergeOn,
maxIterations: args.maxIterations,
convergeOn: convergence.on,
maxIterations: convergence.maxIterations,
dag,
ranks,
stateById,
Expand Down Expand Up @@ -1135,8 +1143,6 @@ const WAIT_AFTER_STREAM_GRACE_MS = 15 * 1000;
const UPSTREAM_SNIPPET_CAP = 2000;
/** Raised listener ceiling to avoid false-positive AbortSignal warnings from SDK internals. */
const ABORT_SIGNAL_LISTENER_LIMIT = 100;
/** Default cap on `--converge-on` re-execution attempts after the initial run. */
const DEFAULT_MAX_ITERATIONS = 3;
/**
* Process exit code reserved for `dag.budget.maxTokensTotal` enforcement.
* Exit `0` is success, `1` is generic failure (existing behavior),
Expand Down Expand Up @@ -1349,11 +1355,12 @@ interface RunConvergenceLoopOptions {
findingsDir?: string;
/**
* DAG-level budget. `budget.maxIterations` adds a soft cap on top of the
* `--max-iterations` CLI flag — the loop aborts and marks the convergence
* task `BUDGET-EXCEEDED` when the next re-run would push the convergence
* task's iteration past this value. `budget.maxTokensTotal` is enforced
* after every rank's `Promise.all` (same as the main run loop) and halts
* the entire run via `BudgetExceededError`.
* active convergence ceiling (DAG `converge.maxIterations`, overridden by
* CLI `--max-iterations`) — the loop aborts and marks the convergence task
* `BUDGET-EXCEEDED` when the next re-run would push the convergence task's
* iteration past this value. `budget.maxTokensTotal` is enforced after
* every rank's `Promise.all` (same as the main run loop) and halts the
* entire run via `BudgetExceededError`.
*/
budget?: DAGBudget;
/** Called after each completed re-execution iteration; used by self-hosting restarts. */
Expand Down Expand Up @@ -1443,9 +1450,9 @@ async function runConvergenceLoop(
// convergence task's `iteration` counter advances by 1 per re-run; if
// the next re-run would push it past the budgeted ceiling, we abort
// here and surface that on the canvas via the new BUDGET-EXCEEDED
// status. The CLI `--max-iterations` flag is enforced by the loop
// header above; the budget is an additional, DAG-author-controlled
// ceiling that can be tighter than the runner default.
// status. The active convergence ceiling is enforced by the loop header
// above; the budget is an additional, DAG-author-controlled ceiling that
// can be tighter than the runner default.
if (budget?.maxIterations !== undefined && iter > budget.maxIterations) {
const now = Date.now();
convergeTs.status = 'BUDGET-EXCEEDED';
Expand Down
Loading
Loading