Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
94 changes: 94 additions & 0 deletions packages/software-factory/src/issue-loop.ts
Original file line number Diff line number Diff line change
Expand Up @@ -172,6 +172,59 @@ function issueSummaryLabel(issue: SchedulableIssue): string {
return summary ? `"${issue.id}" — "${summary}"` : `"${issue.id}"`;
}

/**
* Detect a "vacuous pass" — overall `passed: true` where every step is a
* `passed: true` with zero items checked. The validators are correct to
* return this when their input set is empty (no .gts → nothing to lint),
* but the aggregate is misleading for a non-bootstrap implementation
* issue: the workspace is empty, the agent produced nothing. Bootstrap
* issues legitimately produce only tracker cards (Issues / Projects /
* Knowledge Articles) and have no `.gts` / test / Spec — they are
* supposed to vacuously pass and are exempt.
*
* Per-step `details` shapes:
* - `lint` / `parse`: `filesChecked: number`
* - `evaluate`: `modulesChecked: number`
* - `instantiate`: `cardsChecked: number`
* - `test`: `passedCount + failedCount + skippedCount`
*/
function isVacuousValidationOfImplementation(
results: ValidationResults,
issue: SchedulableIssue,
): boolean {
if (!results.passed) return false;
let issueType = (issue as Record<string, unknown>).issueType;
if (issueType === 'bootstrap') return false;
if (results.steps.length === 0) return false;
return results.steps.every(isVacuousStep);
}

function isVacuousStep(step: ValidationStepResult): boolean {
if (!step.passed) return false;
let details = (step.details ?? {}) as Record<string, unknown>;
let asNumber = (v: unknown): number => (typeof v === 'number' ? v : 0);
switch (step.step) {
case 'lint':
case 'parse':
return asNumber(details.filesChecked) === 0;
case 'evaluate':
return asNumber(details.modulesChecked) === 0;
case 'instantiate':
return asNumber(details.cardsChecked) === 0;
case 'test':
return (
asNumber(details.passedCount) +
asNumber(details.failedCount) +
asNumber(details.skippedCount) ===
0
);
default:
// Unknown step shape — be permissive (don't fire on something we
// can't read) so this guard never blocks a legitimate pass.
return false;
}
}

function formatValidation(results: ValidationResults): string {
if (results.passed) {
let stepCount = results.steps.length;
Expand Down Expand Up @@ -394,6 +447,32 @@ export async function runIssueLoop(
let syncFailed = !preValidationSync.ok || !postValidationSync.ok;
let syncError = preValidationSync.error ?? postValidationSync.error;

// Vacuous-pass guard. Each validator step legitimately returns
// `passed: true` with zero items checked when its category of
// artifact doesn't exist yet (e.g. no .gts → "nothing to lint").
// But if every step is vacuous on a non-bootstrap issue, the
// workspace is empty: the agent produced no card definition, no
// tests, no Spec, no instances. Trusting the aggregated
// "passed: true" would let `signal_done` mark the issue done with
// nothing actually implemented. Override to failed so the next
// iteration re-prompts the agent with a "you wrote nothing"
// message; the existing max-iterations path then blocks the
// issue cleanly if the agent never recovers. Bootstrap issues
// legitimately produce only tracker cards and are exempt.
let vacuousValidationFailure = isVacuousValidationOfImplementation(
validationResults,
issue,
);
if (vacuousValidationFailure) {
log.warn(
` Vacuous validation pass detected for non-bootstrap issue ${issueSummaryLabel(issue)}: every validator step reported zero items checked. Treating as failed.`,
);
validationResults = {
passed: false,
steps: validationResults.steps,
};
}

// If either sync failed, the agent's writes didn't land on the
// realm. Validators will have seen an empty/stale realm so a
// "passed" result is vacuous. Surface the sync error into the
Expand All @@ -403,6 +482,17 @@ export async function runIssueLoop(
validationResults && !validationResults.passed
? validator.formatForContext(validationResults)
: undefined;
let vacuousNotice = vacuousValidationFailure
? [
'Validation failed: every validator step reported "nothing to validate".',
'The workspace contains no card definition, no test file, no Spec, and no sample instances —',
'you called `signal_done` without producing any of the artifacts the ticket requires.',
'Re-read the issue\'s "Files to create" / acceptance section and produce each listed file',
'with native `Write` before calling `signal_done` again. At minimum a non-bootstrap issue',
'must yield: a `.gts` card definition, a co-located `.test.gts`, a `Spec/<slug>.json`',
'referencing the card, and at least one sample instance under `<CardType>/<slug>.json`.',
].join('\n')
: undefined;
if (syncFailed) {
let syncNotice = [
'Workspace sync to the realm FAILED — your file writes are still only on local disk.',
Expand All @@ -415,6 +505,10 @@ export async function runIssueLoop(
validationContext = validationSummary
? `${syncNotice}\n\n${validationSummary}`
: syncNotice;
} else if (vacuousNotice) {
validationContext = validationSummary
? `${vacuousNotice}\n\n${validationSummary}`
: vacuousNotice;
} else {
validationContext = validationSummary;
}
Expand Down
229 changes: 219 additions & 10 deletions packages/software-factory/tests/issue-loop.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -228,15 +228,35 @@ function makeTool(name: string, result: unknown = { ok: true }): FactoryTool {
};
}

// Real validators populate `details` with the per-step counts the new
// vacuous-pass guard inspects (filesChecked / modulesChecked /
// cardsChecked / passedCount+failedCount+skippedCount). Stub passing
// results with non-zero counts so the guard treats them as "the
// validator actually exercised something."
function makePassingValidation(): ValidationResults {
return {
passed: true,
steps: [
{ step: 'parse', passed: true, errors: [] },
{ step: 'lint', passed: true, errors: [] },
{ step: 'evaluate', passed: true, errors: [] },
{ step: 'instantiate', passed: true, errors: [] },
{ step: 'test', passed: true, errors: [] },
{ step: 'parse', passed: true, errors: [], details: { filesChecked: 1 } },
{ step: 'lint', passed: true, errors: [], details: { filesChecked: 1 } },
{
step: 'evaluate',
passed: true,
errors: [],
details: { modulesChecked: 1 },
},
{
step: 'instantiate',
passed: true,
errors: [],
details: { cardsChecked: 1 },
},
{
step: 'test',
passed: true,
errors: [],
details: { passedCount: 1, failedCount: 0, skippedCount: 0 },
},
],
};
}
Expand All @@ -245,11 +265,63 @@ function makeFailingValidation(): ValidationResults {
return {
passed: false,
steps: [
{ step: 'parse', passed: true, errors: [] },
{ step: 'lint', passed: false, errors: [{ message: 'lint error' }] },
{ step: 'evaluate', passed: true, errors: [] },
{ step: 'instantiate', passed: true, errors: [] },
{ step: 'test', passed: false, errors: [{ message: 'test failure' }] },
{ step: 'parse', passed: true, errors: [], details: { filesChecked: 1 } },
{
step: 'lint',
passed: false,
errors: [{ message: 'lint error' }],
details: { filesChecked: 1 },
},
{
step: 'evaluate',
passed: true,
errors: [],
details: { modulesChecked: 1 },
},
{
step: 'instantiate',
passed: true,
errors: [],
details: { cardsChecked: 1 },
},
{
step: 'test',
passed: false,
errors: [{ message: 'test failure' }],
details: { passedCount: 0, failedCount: 1, skippedCount: 0 },
},
],
};
}

// All five validators "passed" but with zero items checked — what
// happens in production when the agent calls signal_done before
// writing any .gts / test / Spec / instance. Real validators return
// this shape (each step is `passed: true` with `filesChecked: 0` etc).
function makeVacuousPassingValidation(): ValidationResults {
return {
passed: true,
steps: [
{ step: 'parse', passed: true, errors: [], details: { filesChecked: 0 } },
{ step: 'lint', passed: true, errors: [], details: { filesChecked: 0 } },
{
step: 'evaluate',
passed: true,
errors: [],
details: { modulesChecked: 0 },
},
{
step: 'instantiate',
passed: true,
errors: [],
details: { cardsChecked: 0 },
},
{
step: 'test',
passed: true,
errors: [],
details: { passedCount: 0, failedCount: 0, skippedCount: 0 },
},
],
};
}
Expand Down Expand Up @@ -1153,3 +1225,140 @@ module('issue-loop > project completion', function () {
);
});
});

// ---------------------------------------------------------------------------
// Vacuous-pass guard (CS-11102)
// ---------------------------------------------------------------------------

module('issue-loop > vacuous validation guard', function () {
test('non-bootstrap issue: signal_done + all-vacuous validation does NOT mark done', async function (assert) {
let store = new MockIssueStore([
makeIssue({
id: 'sn-1',
status: 'backlog',
priority: 'high',
order: 1,
issueType: 'feature',
} as Parameters<typeof makeIssue>[0]),
]);

// Agent calls signal_done with zero card writes — exactly the
// production failure mode from CS-11102.
let agent = new MockLoopAgent(
[
{ toolCalls: [{ tool: 'signal_done', args: {} }] },
{ toolCalls: [{ tool: 'signal_done', args: {} }] },
{ toolCalls: [{ tool: 'signal_done', args: {} }] },
],
store,
);

let result = await runIssueLoop(
makeLoopConfig({
agent,
issueStore: store,
// Every iteration: real validators legitimately report zero
// items because the workspace is empty.
createValidator: () =>
new MockValidator([
makeVacuousPassingValidation(),
makeVacuousPassingValidation(),
makeVacuousPassingValidation(),
]),
maxIterationsPerIssue: 3,
}),
);

let doneUpdate = store.updateCalls.find(
(c) => c.issueId === 'sn-1' && c.updates.status === 'done',
);
assert.notOk(
doneUpdate,
'issue is NOT marked done despite signal_done + every-step-passed',
);
assert.notStrictEqual(
result.outcome,
'all_issues_done',
'outer loop does not declare all-done',
);
assert.strictEqual(
result.issueResults[0].exitReason,
'blocked',
'issue exits as blocked after exhausting iterations',
);
});

test('bootstrap issue: signal_done + all-vacuous validation DOES mark done (exempt from guard)', async function (assert) {
let store = new MockIssueStore([
makeIssue({
id: 'bootstrap-seed',
status: 'backlog',
priority: 'critical',
order: 0,
issueType: 'bootstrap',
} as Parameters<typeof makeIssue>[0]),
]);

let agent = new MockLoopAgent(
[{ toolCalls: [{ tool: 'signal_done', args: {} }] }],
store,
);

let result = await runIssueLoop(
makeLoopConfig({
agent,
issueStore: store,
createValidator: () =>
new MockValidator([makeVacuousPassingValidation()]),
}),
);

let doneUpdate = store.updateCalls.find(
(c) => c.issueId === 'bootstrap-seed' && c.updates.status === 'done',
);
assert.ok(
doneUpdate,
'bootstrap issue IS marked done — exempt from the vacuous-pass guard',
);
assert.strictEqual(result.issueResults[0].exitReason, 'done');
});

test('non-bootstrap issue: signal_done + non-vacuous pass still marks done', async function (assert) {
// Sanity check: ensure the guard does not regress the happy path.
let store = new MockIssueStore([
makeIssue({
id: 'sn-2',
status: 'backlog',
priority: 'high',
order: 1,
issueType: 'feature',
} as Parameters<typeof makeIssue>[0]),
]);

let agent = new MockLoopAgent(
[
{
toolCalls: [
{ tool: 'write_file', args: { path: 'card.gts', content: 'x' } },
{ tool: 'signal_done', args: {} },
],
},
],
store,
);

let result = await runIssueLoop(
makeLoopConfig({
agent,
issueStore: store,
createValidator: () => new MockValidator([makePassingValidation()]),
}),
);

assert.strictEqual(result.issueResults[0].exitReason, 'done');
let doneUpdate = store.updateCalls.find(
(c) => c.issueId === 'sn-2' && c.updates.status === 'done',
);
assert.ok(doneUpdate, 'real implementation issue still marks done');
});
});