Skip to content

Commit f440d4c

Browse files
committed
Add artifact-aware ContextBench five-lane scorer
1 parent 71467e8 commit f440d4c

1 file changed

Lines changed: 83 additions & 0 deletions

File tree

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,83 @@
1+
import { existsSync, mkdirSync, readFileSync, writeFileSync } from 'node:fs';
2+
import { join } from 'node:path';
3+
import { spawnSync } from 'node:child_process';
4+
5+
const root = process.env.ROOT || '/tmp/contextbench-five-lane-score';
6+
const sourceSelectionsPath = process.env.SOURCE_SELECTIONS_PATH || 'scripts/contextbench-five-lane-selections.json';
7+
const externalRoot = process.env.EXTERNAL_READINESS_ROOT || join(root, 'external-readiness');
8+
const resolvedSelectionsPath = join(root, 'resolved-five-lane-selections.json');
9+
10+
function readJson(path) {
11+
return JSON.parse(readFileSync(path, 'utf8'));
12+
}
13+
14+
function loadReadiness(lane, artifactName) {
15+
const candidates = [
16+
join(externalRoot, artifactName, 'pack', `${artifactName}-readiness.json`),
17+
join(externalRoot, artifactName, 'pack', `${lane}-readiness.json`),
18+
join(externalRoot, artifactName, `${artifactName}-readiness.json`),
19+
join(externalRoot, artifactName, `${lane}-readiness.json`),
20+
];
21+
const path = candidates.find((candidate) => existsSync(candidate));
22+
if (!path) {
23+
throw new Error(`readiness artifact for ${lane} not found under ${join(externalRoot, artifactName)}`);
24+
}
25+
const readiness = readJson(path);
26+
if (readiness.ready !== true) throw new Error(`${lane} readiness artifact is not ready`);
27+
if (readiness.toolCallable !== true) throw new Error(`${lane} readiness artifact does not prove callable tool`);
28+
if (!Number.isFinite(Number(readiness.candidateCount)) || Number(readiness.candidateCount) <= 0) {
29+
throw new Error(`${lane} readiness artifact has no candidates`);
30+
}
31+
return readiness;
32+
}
33+
34+
function assertSelectedFilesCameFromCandidates(selection, readiness) {
35+
if (selection.validateCandidateFiles === false) return;
36+
const candidateFiles = new Set((readiness.candidates || []).map((candidate) => String(candidate.file || '').replaceAll('\\', '/')));
37+
if (candidateFiles.size === 0) return;
38+
const selectedFiles = new Set([
39+
...(selection.files || []),
40+
...(selection.spans || []).map((span) => span.file),
41+
].filter(Boolean).map((file) => String(file).replaceAll('\\', '/')));
42+
const missing = [...selectedFiles].filter((file) => !candidateFiles.has(file));
43+
if (missing.length > 0) {
44+
throw new Error(`${selection.lane_id || selection.lane} selected files missing from readiness candidates: ${missing.join(', ')}`);
45+
}
46+
}
47+
48+
const selections = readJson(sourceSelectionsPath);
49+
const resolved = {
50+
...selections,
51+
laneSelections: selections.laneSelections.map((selection) => {
52+
const lane = selection.lane_id || selection.lane;
53+
if (!selection.readinessArtifact) return selection;
54+
const readiness = loadReadiness(lane, selection.readinessArtifact);
55+
assertSelectedFilesCameFromCandidates(selection, readiness);
56+
return {
57+
...selection,
58+
readiness: {
59+
setupStatus: readiness.setupStatus,
60+
indexStatus: readiness.indexStatus,
61+
toolCallable: readiness.toolCallable,
62+
candidateCount: readiness.candidateCount,
63+
setupIndex: readiness.setupIndex,
64+
sourceRun: selection.sourceRun,
65+
sourceJob: selection.sourceJob,
66+
sourceArtifact: selection.sourceArtifact,
67+
sourceDigest: selection.sourceDigest,
68+
},
69+
};
70+
}),
71+
};
72+
73+
mkdirSync(root, { recursive: true });
74+
writeFileSync(resolvedSelectionsPath, `${JSON.stringify(resolved, null, 2)}\n`);
75+
76+
const result = spawnSync('node', ['scripts/contextbench-score-five-lane-selections.mjs'], {
77+
cwd: process.cwd(),
78+
env: { ...process.env, SELECTIONS_PATH: resolvedSelectionsPath },
79+
stdio: 'inherit',
80+
timeout: 60 * 60 * 1000,
81+
});
82+
if (result.error) throw result.error;
83+
process.exitCode = typeof result.status === 'number' ? result.status : 1;

0 commit comments

Comments
 (0)