FlatbreadLabs · tonyketcham · May 12, 2026 · May 12, 2026 · May 12, 2026 · May 12, 2026
diff --git a/.cursor/skills/proof/SKILL.md b/.cursor/skills/proof/SKILL.md
@@ -256,8 +256,8 @@ set -a && source .env && set +a
 - Local runtime only — every subagent runs against `--cwd` (defaults to wherever you invoke the runner).
 - Sibling tasks in the same rank run in parallel; do not let them write the same files.
 - Inline MCP servers and sub-sub-agents are not configured by this runner.
-- A failed task automatically skips all downstream dependents (they are marked `ERROR` with a "Skipped: upstream task(s) … failed" message). This prevents wasted API calls on tasks whose inputs are missing.
-- Per-task streamed text is capped at `STREAM_CAP = 4000` chars to keep the canvas file modest. Upstream context passed to child tasks is capped at 2000 chars per parent, with section-aware truncation when the parent output contains multiple `##` sections.
+- A failed upstream task skips downstream dependents (`ERROR` with `Skipped:` when any upstream is **`ERROR`** or **`BUDGET-EXCEEDED`**).
+- Canvas-inlined streamed text stays bounded (**`CANVAS_DISPLAY_CAP = 4000`** tail per task plus the existing `[...truncated N earlier chars...]` banner). For `kind: 'task'`, child prompts, in-process convergence loops, findings sidecars, and artifact markdown use a separate **execution transcript**; resumed runs can reconstruct it when the same `--full-output-dir` is reused and `transcriptPath` points at the mirrored stream file. Pause/oracle tasks still use their bounded status/output text. Upstream excerpts default to the same **2000-char section-aware policy** as before, now with explicit counted banners when trimming. Set **`DAG.outputPolicy.upstream`** to **`"full"`** to stitch full parent transcripts (mind model context limits).
 - Timed-out tasks are marked `ERROR` instead of staying indefinitely in `RUNNING`.
 - SIGINT/SIGTERM/SIGHUP gracefully cancel all in-flight subagents and finalize the canvas before exiting.
 - Unexpected unhandled rejections from SDK internals are suppressed to prevent runner crashes; uncaught exceptions are logged and trigger a clean shutdown.

diff --git a/docs/proposals/proof-output-retention-judge.md b/docs/proposals/proof-output-retention-judge.md
diff --git a/docs/proposals/proof-output-retention-plan.md b/docs/proposals/proof-output-retention-plan.md
diff --git a/docs/proposals/proof-output-retention-review.md b/docs/proposals/proof-output-retention-review.md
diff --git a/packages/proof/README.md b/packages/proof/README.md
@@ -149,8 +149,16 @@ By default, every **full DAG run** writes per-task markdown transcripts to a tim
   _dag.json      # The original DAG definition
   _index.md      # Run summary: outcome, timings, and links to all transcripts
   <task-id>.md   # Full agent output for each task (kind: task, oracle, or pause)
+  <task-id>.stream.txt   # Append-only assistant transcript mirror (`kind: task` only)
 ```
 
+**Execution vs canvas:**
+
+- For `kind: "task"` only, stitched prompts, in-process convergence parsing (`--converge-on` / `DAG.loops`), `${task-id}.findings.json` payloads (`--findings-dir`), and `<task-id>.md` derive from an **execution-authoritative** transcript. Resumed runs can reconstruct that transcript when the same `--full-output-dir` is reused and `transcriptPath` points at `${task-id}.stream.txt`; otherwise legacy bounded `resultText` remains the fallback.
+- The inlined canvas payload snapshots only a **4000-character display tail** (`CANVAS_DISPLAY_CAP`) per task plus an optional **`transcriptPath`** when `${task-id}.stream.txt` is mirrored.
+- Author `DAG.outputPolicy.upstream` as `"full"` or `"summarize"` (default) to widen or keep the upstream excerpt policy; trims carry visible counted banners.
+- Downstream nodes are skipped with `ERROR` when any upstream is `ERROR` or `BUDGET-EXCEEDED`.
+
 Paths resolve from `--cwd` (defaults to the process working directory). The live canvas still defaults under `~/.cursor/projects/<workspace-slug>/canvases/` when using `--canvas` without `--canvas-path`.
 
 Previously, transcripts only appeared when you passed `--full-output-dir`; now they land under `.flatbread/` by default. Use `--no-artifacts` for opt-out, or `--full-output-dir` to redirect elsewhere.

diff --git a/packages/proof/package.json b/packages/proof/package.json
@@ -6,8 +6,8 @@
   "scripts": {
     "build": "tsup",
     "dev": "tsup --watch src",
-    "test": "pnpm --dir ../.. exec ava packages/proof/src/__tests__/loops.test.ts",
-    "test:watch": "pnpm --dir ../.. exec ava --watch packages/proof/src/__tests__/loops.test.ts",
+    "test": "pnpm --dir ../.. exec ava \"packages/proof/src/__tests__/**/*.test.ts\"",
+    "test:watch": "pnpm --dir ../.. exec ava --watch \"packages/proof/src/__tests__/**/*.test.ts\"",
     "typecheck": "tsc -p tsconfig.json --noEmit",
     "models:list": "tsx src/list_models.ts",
     "cursor:fetch-cloud-agent": "node scripts/fetch-cloud-agent-conversation.mjs"

diff --git a/packages/proof/src/__tests__/output-retention-phase1.test.ts b/packages/proof/src/__tests__/output-retention-phase1.test.ts
@@ -0,0 +1,250 @@
+import { mkdtempSync, readFileSync, rmSync, writeFileSync } from 'node:fs';
+import { tmpdir } from 'node:os';
+import { dirname, join } from 'node:path';
+import { fileURLToPath } from 'node:url';
+import test from 'ava';
+
+import { parseDAG } from '../dag.js';
+import {
+  buildConvergenceContext,
+  extractConvergenceFindings,
+} from '../converge_loop.js';
+import {
+  TaskTranscriptStore,
+  taskStreamArtifactRelPath,
+} from '../task_transcript.js';
+import type { TaskState } from '../canvas_writer.js';
+import {
+  CANVAS_DISPLAY_CAP,
+  excerptUpstreamForPrompt,
+  parseUpstreamSections,
+  renderUpstreamSections,
+  summarizeUpstreamForPrompt,
+  UPSTREAM_SNIPPET_CAP,
+} from '../upstream_policy.js';
+import { renderCanvasSource, initialRunState } from '../canvas_writer.js';
+import { writeFindingsSidecar } from '../findings_sidecar.js';
+
+test('parseDAG accepts DAG.outputPolicy.upstream', (t) => {
+  const dag = parseDAG({
+    title: 'pol',
+    outputPolicy: { upstream: 'full' },
+    tasks: [
+      {
+        id: 'a',
+        depends_on: [],
+        complexity: 'LOW',
+        subtask_prompt: 'do',
+      },
+    ],
+  });
+  t.is(dag.outputPolicy?.upstream, 'full');
+});
+
+test('parseDAG rejects invalid outputPolicy upstream value', (t) => {
+  t.throws(
+    () =>
+      parseDAG({
+        title: 'bad',
+        outputPolicy: { upstream: 'everything' },
+        tasks: [
+          {
+            id: 'a',
+            depends_on: [],
+            complexity: 'LOW',
+            subtask_prompt: 'do',
+          },
+        ],
+      }),
+    { message: /upstream must be/ }
+  );
+});
+
+test('parseDAG rejects unknown outputPolicy keys', (t) => {
+  t.throws(
+    () =>
+      parseDAG({
+        title: 'bad-key',
+        outputPolicy: { upstram: 'full' },
+        tasks: [
+          {
+            id: 'a',
+            depends_on: [],
+            complexity: 'LOW',
+            subtask_prompt: 'do',
+          },
+        ],
+      }),
+    { message: /DAG\.outputPolicy\.upstram is not supported/ }
+  );
+});
+
+test('summarize upstream attaches counted excerpt banner instead of omitting rationale', (t) => {
+  const filler = 'y'.repeat(5000);
+  const { excerpt } = summarizeUpstreamForPrompt(filler, UPSTREAM_SNIPPET_CAP);
+  t.true(
+    excerpt.includes('[...upstream excerpt:') &&
+      excerpt.includes('parent output was 5000 chars')
+  );
+  t.false(/^[^\n]+\u2026$/u.test(excerpt.trim().split(/\n/).pop() ?? ''));
+});
+
+test('full upstream excerpt includes late marker past multi-kchar parents', (t) => {
+  const preamble = 'z'.repeat(2800);
+  const tailMarker = `${'x'.repeat(9100)}MARKER_LATE`;
+  const blob = `${preamble}\n## Section one\nstuff\n## Blockers\n${tailMarker}`;
+  const full = excerptUpstreamForPrompt(blob, 'full');
+  t.true(full.includes('MARKER_LATE'));
+});
+
+test('convergence extract sees late section beyond legacy STREAM cap window', (t) => {
+  const long = `${'p'.repeat(6000)}\n## Blockers\n- late blocker\n`;
+  const f = extractConvergenceFindings(long);
+  t.true(f.hasIssues);
+  t.true(f.blockerLines.some((l) => l.includes('late blocker')));
+});
+
+test('convergence extraContext carries late blockers under full upstream excerpt mode', (t) => {
+  const long = `${'p'.repeat(6000)}\n## Blockers\n- still broken\n`;
+  const ctx = buildConvergenceContext('reviewer', 2, long, 'full');
+  t.true(ctx.includes('## Blockers'));
+  t.true(ctx.includes('still broken'));
+});
+
+test('findings sidecar uses parseSource (full transcript) over bounded resultText', async (t) => {
+  const dir = mkdtempSync(join(tmpdir(), 'proof-sidecar-'));
+  try {
+    const ts: TaskState = {
+      id: 'task-a',
+      depends_on: [],
+      complexity: 'LOW',
+      subtask_prompt: 'x',
+      status: 'FINISHED',
+      model: 'gpt-5.4',
+      resultText: '## Blockers\n(none)',
+    };
+    const longTruth = `${'z'.repeat(5000)}\n## Blockers\n- deep blocker line\n`;
+    await writeFindingsSidecar(dir, ts, { parseSource: longTruth });
+    const raw = readFileSync(join(dir, 'task-a.findings.json'), 'utf8');
+    const parsed = JSON.parse(raw) as { sections: Record<string, string> };
+    t.true(parsed.sections.Blockers?.includes('deep blocker line'));
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test('upstream section parsing keeps canvas truncation banner before headings', (t) => {
+  const line = '[...truncated 9000 earlier chars...]';
+  const body = `${line}\n## Blockers\nhit\n`;
+  const sections = parseUpstreamSections(body);
+  t.true(sections.some((s) => s.heading === 'Upstream truncation notice'));
+  const rendered = renderUpstreamSections(sections);
+  t.true(rendered.includes(line));
+});
+
+test('upstream section parsing keeps freeform preamble before headings', (t) => {
+  const body = `Important preface before headings.\nStill preface.\n## Findings\nhit\n## Proposed contract\nkeep\n`;
+  const sections = parseUpstreamSections(body);
+  t.is(sections[0]?.heading, 'Upstream preamble');
+  const rendered = renderUpstreamSections(sections);
+  t.true(rendered.includes('Important preface before headings.'));
+});
+
+test('summarize upstream does not rewrite author-owned trailing ellipsis', (t) => {
+  const body = [
+    '## Summary',
+    'This sentence intentionally trails off…',
+    '',
+    '## Current contract',
+    'drop me '.repeat(500),
+    '',
+    '## Findings',
+    'keep this section',
+  ].join('\n');
+  const { excerpt } = summarizeUpstreamForPrompt(body, 500);
+  t.true(excerpt.includes('trails off…'));
+  t.false(excerpt.includes('[...truncated in excerpt body at char cap …]'));
+});
+
+test('task transcript mirror serializes overlapping flushes in append order', async (t) => {
+  const dir = mkdtempSync(join(tmpdir(), 'proof-stream-'));
+  const store = new TaskTranscriptStore();
+  try {
+    await store.beginMirroredAppend('task-a', dir);
+    store.append('task-a', 'a');
+    const first = store.flushStreamMirror('task-a');
+    store.append('task-a', 'b');
+    const second = store.flushStreamMirror('task-a');
+    await Promise.all([first, second]);
+    await store.flushStreamMirror('task-a');
+    const raw = readFileSync(
+      join(dir, taskStreamArtifactRelPath('task-a')),
+      'utf8'
+    );
+    t.is(raw, 'ab');
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test('task transcript store reads existing mirror files after resume', (t) => {
+  const dir = mkdtempSync(join(tmpdir(), 'proof-stream-resume-'));
+  const store = new TaskTranscriptStore();
+  try {
+    const rel = taskStreamArtifactRelPath('task-a');
+    writeFileSync(join(dir, rel), 'full transcript from prior process', 'utf8');
+    store.registerExistingMirror('task-a', dir, rel);
+    t.is(store.getJoined('task-a'), 'full transcript from prior process');
+  } finally {
+    rmSync(dir, { recursive: true, force: true });
+  }
+});
+
+test('canvas render growth stays bounded by display-sized tails versus megabyte dumps', (t) => {
+  const tasks = Array.from({ length: 5 }, (_, i) => ({
+    id: `t${i}`,
+    depends_on: [] as string[],
+    complexity: 'LOW' as const,
+    subtask_prompt: `${'prompt:'.repeat(200)}\n`,
+  }));
+  const dag = parseDAG({ title: 'canvas-env', tasks });
+  const fresh = (): ReturnType<typeof initialRunState> =>
+    initialRunState(dag, () => ({
+      id: 'gpt-5.4',
+    }));
+
+  const baselineLen = renderCanvasSource(fresh()).length;
+
+  const cappedState = fresh();
+  cappedState.tasks.forEach((st) => {
+    st.resultText = `[...truncated 800000 earlier chars...]\n${'a'.repeat(
+      CANVAS_DISPLAY_CAP
+    )}`;
+  });
+  const cappedLen = renderCanvasSource(cappedState).length;
+
+  const leakyState = fresh();
+  leakyState.tasks.forEach((st) => {
+    st.resultText = `[...truncated 800000 earlier chars...]\n${'b'.repeat(
+      12000
+    )}`;
+  });
+  const uncappedLen = renderCanvasSource(leakyState).length;
+
+  t.true(cappedLen < baselineLen + 5 * CANVAS_DISPLAY_CAP + 96000);
+
+  /** Longer fake transcripts should substantially grow the inlined JSON blob. */
+  t.true(
+    uncappedLen - cappedLen > 35000,
+    'expected materially larger stringify when payloads stay long'
+  );
+});
+
+test('runOne skips children when upstream is BUDGET-EXCEEDED (guard in run_dag)', (t) => {
+  const path = join(dirname(fileURLToPath(import.meta.url)), '../run_dag.ts');
+  const src = readFileSync(path, 'utf8');
+  const idx = src.indexOf('failedDeps = task.depends_on.filter');
+  t.not(idx, -1);
+  const snippet = src.slice(idx, idx + 450);
+  t.true(snippet.includes("'BUDGET-EXCEEDED'"));
+});
diff --git a/packages/proof/src/canvas_writer.ts b/packages/proof/src/canvas_writer.ts
@@ -49,6 +49,12 @@ export interface TaskState {
   startedAt?: number;
   finishedAt?: number;
   resultText?: string;
+  /**
+   * Relative path (under the run artifact directory) to the append-only stream
+   * mirror for this task's full assistant transcript. Canvas shows bounded
+   * `resultText`; this pointer is for locating the authoritative stream file.
+   */
+  transcriptPath?: string;
   errorMessage?: string;
   inputTokens?: number;
   outputTokens?: number;
@@ -201,7 +207,7 @@ export class CanvasWriter {
   }
 }
 
-function renderCanvasSource(state: RunState): string {
+export function renderCanvasSource(state: RunState): string {
   const stateLiteral = JSON.stringify(state, null, 2);
   return `${HEADER}\n\nconst STATE: RunState = ${stateLiteral};\n\n${BODY}\n`;
 }
@@ -258,6 +264,11 @@ interface TaskState {
   startedAt?: number;
   finishedAt?: number;
   resultText?: string;
+  /**
+   * Relative path (artifact dir) for the authoritative stream transcript.
+   * Canvas shows bounded resultText strings; transcriptPath reveals the mirror file path.
+   */
+  transcriptPath?: string;
   errorMessage?: string;
   inputTokens?: number;
   outputTokens?: number;
@@ -793,6 +804,12 @@ function TaskList({
                       {t.resultText}
                       {t.status === 'RUNNING' ? '\u2588' : ''}
                     </pre>
+                    {t.transcriptPath ? (
+                      <Text size="small" tone="tertiary">
+                        Full transcript file (relative to artifact dir):{' '}
+                        {t.transcriptPath}
+                      </Text>
+                    ) : null}
                   </Stack>
                 ) : t.status === 'RUNNING' ? (
                   <Text size="small" tone="tertiary" italic>

diff --git a/packages/proof/src/converge_loop.ts b/packages/proof/src/converge_loop.ts
@@ -2,7 +2,7 @@
  * --converge-on <task-id> + --max-iterations <N> loop helpers.
  *
  * The convergence task is expected to be a `flatbread-adversarial-reviewer`
- * style node — its `resultText` follows the schema:
+ * style node — its bounded canvas `resultText` follows the schema:
  *
  *   ## Blockers
  *   …
@@ -27,6 +27,10 @@ import {
   type DAG,
   type ResolvedConvergenceLoop,
 } from './dag.js';
+import {
+  type UpstreamPolicyMode,
+  excerptUpstreamForPrompt,
+} from './upstream_policy.js';
 
 export interface ConvergenceFindings {
   hasIssues: boolean;
@@ -151,18 +155,29 @@ export function resolveLoopReexecuteIds(
 }
 
 /**
- * Renders the convergence task's `resultText` into the standard "extra
+ * Renders the convergence task's reviewer transcript into the standard "extra
  * upstream context" preamble we stitch into ancestor prompts on re-run. The
  * iteration index lets re-runs distinguish their feedback from any future
- * iterations.
+ * iterations. The body is excerpted via the same upstream policy as child
+ * `buildUpstreamContext` — never silently truncated mid-review.
  */
 export function buildConvergenceContext(
   convergeTaskId: string,
   iteration: number,
-  resultText: string | undefined
+  reviewerTranscript: string | undefined,
+  upstreamMode: UpstreamPolicyMode = 'summarize'
 ): string {
-  const trimmed = (resultText ?? '').trim();
-  const body = trimmed === '' ? '(empty result text)' : trimmed;
+  const trimmed = (reviewerTranscript ?? '').trim();
+  if (trimmed === '') {
+    return [
+      `Convergence feedback from "${convergeTaskId}" (iteration ${
+        iteration - 1
+      }):`,
+      '',
+      '(empty result text)',
+    ].join('\n');
+  }
+  const body = excerptUpstreamForPrompt(trimmed, upstreamMode);
   return [
     `Convergence feedback from "${convergeTaskId}" (iteration ${
       iteration - 1