Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions src/analyst/analyst.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -493,6 +493,83 @@ describe('diffFindings policy', () => {
})
})

describe('RegistryRunOpts.priorFindings forwarding', () => {
function makeRecordingAnalyst(id: string): Analyst & {
seen: Array<ReadonlyArray<AnalystFinding> | undefined>
} {
const seen: Array<ReadonlyArray<AnalystFinding> | undefined> = []
return {
id,
description: `recording ${id}`,
inputKind: 'custom',
cost: { kind: 'deterministic' },
version: '1.0.0',
seen,
async analyze(_input: unknown, ctx: import('./types').AnalystContext) {
seen.push(ctx.priorFindings)
return []
},
} as never
}

function p(id: string, analystId = 'a'): AnalystFinding {
return makeFinding({
analyst_id: analystId,
area: 'x',
claim: id,
severity: 'low',
confidence: 0.5,
evidence_refs: [],
})
}

const inputs: AnalystRunInputs = { custom: { a: 1, b: 2 } }

it('array form: each analyst sees only its own prior findings', async () => {
const r = new AnalystRegistry()
const a = makeRecordingAnalyst('a')
const b = makeRecordingAnalyst('b')
r.register(a)
r.register(b)
const prior = [p('one', 'a'), p('two', 'a'), p('three', 'b')]
await r.run('run-1', inputs, { priorFindings: prior })
expect(a.seen[0]?.map((f) => f.claim)).toEqual(['one', 'two'])
expect(b.seen[0]?.map((f) => f.claim)).toEqual(['three'])
})

it('array form: analyst with no matching prior gets undefined (not empty array)', async () => {
const r = new AnalystRegistry()
const a = makeRecordingAnalyst('a')
r.register(a)
await r.run('run-1', inputs, { priorFindings: [p('other', 'b')] })
expect(a.seen[0]).toBeUndefined()
})

it('record form: wildcard "*" findings reach every analyst', async () => {
const r = new AnalystRegistry()
const a = makeRecordingAnalyst('a')
const b = makeRecordingAnalyst('b')
r.register(a)
r.register(b)
await r.run('run-1', inputs, {
priorFindings: {
a: [p('a-only', 'a')],
'*': [p('everyone', 'failure-mode')],
},
})
expect(a.seen[0]?.map((f) => f.claim)).toEqual(['a-only', 'everyone'])
expect(b.seen[0]?.map((f) => f.claim)).toEqual(['everyone'])
})

it('no priorFindings option: ctx.priorFindings is undefined', async () => {
const r = new AnalystRegistry()
const a = makeRecordingAnalyst('a')
r.register(a)
await r.run('run-1', inputs)
expect(a.seen[0]).toBeUndefined()
})
})

describe('ChatClient signal racing', () => {
it('mock transport rejects on abort even if handler is slow', async () => {
const controller = new AbortController()
Expand Down
45 changes: 45 additions & 0 deletions src/analyst/kind-factory.ts
Original file line number Diff line number Diff line change
Expand Up @@ -113,9 +113,11 @@ export function createTraceAnalystKind(
const tools = spec.buildTools(store)
const maxDepth = spec.recursion?.maxDepth ?? 0
const maxParallel = spec.recursion?.maxParallelSubagents ?? 2
const priorContext = renderPriorFindings(ctx.priorFindings)

const actorDescription =
spec.actorDescription.trim() +
priorContext +
'\n\n' +
RAW_FINDING_SCHEMA_PROMPT +
'\n\nReturn the array in the `findings` output field. Use `final(...)` ' +
Expand Down Expand Up @@ -228,3 +230,46 @@ function evidenceKindFromUri(uri: string): 'span' | 'artifact' | 'metric' | 'eve
if (uri.startsWith('finding://')) return 'finding'
return 'artifact'
}

/**
* Render a compact prior-findings block the actor reads alongside its
* brief. Each row is one line so the actor can scan dozens cheaply.
* The kind's prompt instructs the actor to (a) check whether a new
* cluster matches a prior `finding_id` (carry the id forward via
* `id_basis` to keep diffs stable) and (b) raise severity / confidence
* when a prior finding has reappeared without remediation.
*
* Returns the empty string when there are no prior findings — most
* runs are "first-of-its-kind" and the prompt stays unchanged.
*
* Exported for tests + for consumers that build their own actor
* prompts (e.g. specialized analysts living outside the default kinds).
*/
export function renderPriorFindings(prior: AnalystContext['priorFindings']): string {
if (!prior || prior.length === 0) return ''
const MAX_ROWS = 40 // keep the block under ~2KB; older history is summarized externally
const rows = prior.slice(0, MAX_ROWS).map((f) => {
const subject = f.subject ? ` [${f.subject}]` : ''
return ` - id=${f.finding_id} ${f.severity}${subject} ${truncateForContext(f.claim, 160)}`
})
const overflow =
prior.length > MAX_ROWS
? `\n ... +${prior.length - MAX_ROWS} more prior findings (older history truncated)`
: ''
return [
'',
'',
'PRIOR FINDINGS (from a previous run on related data):',
'When the work you do now matches a row below, REUSE the `finding_id` (pass it as `id_basis`) so the cross-run diff stays stable.',
'A finding that reappears with no remediation evidence SHOULD raise its `confidence` and may justify a higher `severity`.',
...rows,
overflow,
]
.filter(Boolean)
.join('\n')
}

function truncateForContext(s: string, max: number): string {
if (s.length <= max) return s
return `${s.slice(0, max - 1).trimEnd()}…`
}
61 changes: 60 additions & 1 deletion src/analyst/kinds/kinds.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ import { describe, expect, it, vi } from 'vitest'
import { parseRawFinding, RawAnalystFindingSchema } from '../finding-signature'
import { createTraceAnalystKind, type TraceAnalystKindSpec } from '../kind-factory'
import { buildTraceToolsForGroup } from '../tool-groups'
import { computeFindingId } from '../types'
import { computeFindingId, makeFinding } from '../types'
import {
DEFAULT_TRACE_ANALYST_KINDS,
FAILURE_MODE_KIND_SPEC,
Expand Down Expand Up @@ -204,6 +204,65 @@ describe('createTraceAnalystKind wires the spec into the Analyst contract', () =
expect(analyst.version).toBe('1.0.0+mipro-2026-05-18')
})

describe('renderPriorFindings (cross-run retrieval context)', () => {
it('returns empty string when there are no prior findings', async () => {
const { renderPriorFindings } = await import('../kind-factory')
expect(renderPriorFindings(undefined)).toBe('')
expect(renderPriorFindings([])).toBe('')
})

it('emits one compact line per prior finding with the stable finding_id', async () => {
const { renderPriorFindings } = await import('../kind-factory')
const prior = [
makeFinding({
analyst_id: 'failure-mode',
area: 'failure-mode',
subject: 'tool:foo',
claim: 'tool foo loops on identical args',
severity: 'high',
confidence: 0.9,
evidence_refs: [],
}),
makeFinding({
analyst_id: 'failure-mode',
area: 'failure-mode',
subject: 'auth',
claim: 'auth revoked mid-run',
severity: 'critical',
confidence: 0.95,
evidence_refs: [],
}),
]
const out = renderPriorFindings(prior)
expect(out).toMatch(/PRIOR FINDINGS/)
expect(out).toMatch(/REUSE the `finding_id`/)
const first = prior[0]
const second = prior[1]
if (!first || !second) throw new Error('test setup invariant')
expect(out).toContain(`id=${first.finding_id}`)
expect(out).toContain(`id=${second.finding_id}`)
expect(out).toContain('[tool:foo]')
expect(out).toContain('[auth]')
})

it('truncates over 40 prior findings + reports the overflow count', async () => {
const { renderPriorFindings } = await import('../kind-factory')
const many = Array.from({ length: 60 }, (_, i) =>
makeFinding({
analyst_id: 'failure-mode',
area: 'failure-mode',
subject: `mode-${i}`,
claim: `finding ${i}`,
severity: 'medium',
confidence: 0.7,
evidence_refs: [],
}),
)
const out = renderPriorFindings(many)
expect(out).toContain('+20 more prior findings')
})
})

it('finding_id is stable across runs for the same kind + area + claim + subject', () => {
const a = computeFindingId({
analyst_id: 'failure-mode',
Expand Down
38 changes: 38 additions & 0 deletions src/analyst/registry.ts
Original file line number Diff line number Diff line change
Expand Up @@ -99,6 +99,15 @@ export interface RegistryRunOpts {
signal?: AbortSignal
/** Tags echoed into AnalystContext.tags — useful for tracking environment/version in findings. */
tags?: Record<string, string>
/**
* Prior-run findings made available as retrieval context to every
* analyst via `ctx.priorFindings`. The registry forwards the slice
* whose `analyst_id` matches each registered analyst so a kind sees
* only its own history. Pass `{ '*': findings }` to broadcast to
* every analyst (useful for cross-kind chaining where the improvement
* analyst consumes upstream failure findings).
*/
priorFindings?: ReadonlyArray<AnalystFinding> | Record<string, ReadonlyArray<AnalystFinding>>
}

export class AnalystRegistry {
Expand Down Expand Up @@ -187,6 +196,7 @@ export class AnalystRegistry {
tags: runOpts.tags,
log: (msg, fields) => log(`[${analyst.id}] ${msg}`, { runId, correlationId, ...fields }),
signal: runOpts.signal,
priorFindings: selectPriorFindings(runOpts.priorFindings, analyst.id),
}

await hooks.onBeforeAnalyze?.({ analyst, ctx, runId })
Expand Down Expand Up @@ -337,3 +347,31 @@ function sumFindingCost(findings: AnalystFinding[]): number {
}
return sum
}

/**
* Resolve the `priorFindings` slice an analyst sees.
*
* - Array form → the analyst sees only findings whose `analyst_id`
* matches its own id, so a kind never reads
* another kind's history by accident.
* - Record form → the analyst gets the entry keyed by its id, with
* the `'*'` wildcard appended (in that order). Use
* the wildcard for cross-kind chaining, e.g. when
* `improvement` should see all upstream failure /
* gap / poisoning findings.
*/
function selectPriorFindings(
source: RegistryRunOpts['priorFindings'],
analystId: string,
): ReadonlyArray<AnalystFinding> | undefined {
if (!source) return undefined
if (Array.isArray(source)) {
const own = source.filter((f) => f.analyst_id === analystId)
return own.length > 0 ? own : undefined
}
const record = source as Record<string, ReadonlyArray<AnalystFinding>>
const own = record[analystId] ?? []
const wildcard = record['*'] ?? []
const merged = [...own, ...wildcard]
return merged.length > 0 ? merged : undefined
}
10 changes: 10 additions & 0 deletions src/analyst/types.ts
Original file line number Diff line number Diff line change
Expand Up @@ -134,6 +134,16 @@ export interface AnalystContext {
* analyst code.
*/
chat?: ChatClient
/**
* Findings from a prior run the operator wants the analyst to see as
* retrieval context. Kinds that take advantage of cross-run memory
* (failure-mode "I saw this cluster last run", knowledge-gap "the wiki
* page I asked for is still missing") render these into the actor's
* working set. Filtering is the operator's job: pass the slice that
* matches the analyst's id, or pass everything and let the kind
* filter. Empty / absent means no cross-run context.
*/
priorFindings?: ReadonlyArray<AnalystFinding>
/** Free-form runtime tags (env, host, op). Findings can echo these into metadata. */
tags?: Record<string, string>
/** Logger callback — analysts SHOULD prefer this over console.* for testability. */
Expand Down
2 changes: 1 addition & 1 deletion src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -45,7 +45,7 @@ export type {
TraceAnalystGolden,
TraceAnalystKindSpec,
} from './analyst/kind-factory'
export { createTraceAnalystKind } from './analyst/kind-factory'
export { createTraceAnalystKind, renderPriorFindings } from './analyst/kind-factory'
export {
DEFAULT_TRACE_ANALYST_KINDS,
FAILURE_MODE_KIND_SPEC,
Expand Down
Loading