Skip to content

Commit 0f36a30

Browse files
committed
Add single-lane ContextBench readiness script
1 parent d675cda commit 0f36a30

1 file changed

Lines changed: 340 additions & 0 deletions

File tree

Lines changed: 340 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,340 @@
1+
import { spawnSync } from 'node:child_process';
2+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
3+
import { basename, join, relative } from 'node:path';
4+
5+
const laneId = process.env.LANE_ID;
6+
const targetTaskId = process.env.TARGET_TASK_ID || 'SWE-Bench-Pro__go__maintenance__bugfix__4df06349';
7+
const root = process.env.ROOT || '/tmp/contextbench-single-lane';
8+
const outDir = join(root, 'pack');
9+
const requiredLanes = ['raw-native', 'codebase-context', 'codebase-memory-mcp', 'grepai', 'codegraphcontext'];
10+
if (!requiredLanes.includes(laneId)) throw new Error(`unsupported LANE_ID: ${laneId}`);
11+
mkdirSync(outDir, { recursive: true });
12+
13+
const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
14+
const task = payloads.tasks.find((candidate) => candidate.instance_id === targetTaskId);
15+
if (!task) throw new Error(`target task ${targetTaskId} missing from payloads`);
16+
const repo = task.repo_checkout_path;
17+
18+
function run(cmd, args, opts = {}) {
19+
const started = Date.now();
20+
const r = spawnSync(cmd, args, {
21+
cwd: opts.cwd || process.cwd(),
22+
env: opts.env || process.env,
23+
encoding: 'utf8',
24+
timeout: opts.timeoutMs || 300000,
25+
maxBuffer: 128 * 1024 * 1024,
26+
});
27+
return {
28+
command: [cmd, ...args].join(' '),
29+
cwd: opts.cwd || process.cwd(),
30+
status: typeof r.status === 'number' ? r.status : null,
31+
signal: r.signal,
32+
error: r.error?.message || null,
33+
durationMs: Date.now() - started,
34+
stdout: r.stdout || '',
35+
stderr: r.stderr || '',
36+
};
37+
}
38+
39+
const durationOf = (commands) => commands.reduce((sum, command) => sum + command.durationMs, 0);
40+
41+
function queryOf(text) {
42+
const stop = new Set(['that', 'this', 'with', 'from', 'when', 'then', 'into', 'should', 'would', 'could', 'there', 'where', 'which', 'about', 'after', 'before', 'have', 'will', 'been', 'than', 'also', 'only', 'some', 'using', 'must']);
43+
return String(text || '')
44+
.replace(/[`*_#>\[\](){},.;:!?/\\]/g, ' ')
45+
.split(/\s+/)
46+
.filter((w) => w.length >= 4 && !stop.has(w.toLowerCase()))
47+
.slice(0, 24)
48+
.join(' ');
49+
}
50+
51+
function collectRepoFiles(dir, prefix = '', files = []) {
52+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
53+
if (entry.name === '.git' || entry.name === 'vendor' || entry.name === 'node_modules') continue;
54+
const rel = prefix ? `${prefix}/${entry.name}` : entry.name;
55+
const abs = join(dir, entry.name);
56+
if (entry.isDirectory()) collectRepoFiles(abs, rel, files);
57+
else files.push(rel.replaceAll('\\', '/'));
58+
}
59+
return files;
60+
}
61+
62+
const repoFiles = collectRepoFiles(repo);
63+
const repoFileSet = new Set(repoFiles);
64+
const basenameMap = new Map();
65+
for (const file of repoFiles) {
66+
const name = basename(file);
67+
const list = basenameMap.get(name) || [];
68+
list.push(file);
69+
basenameMap.set(name, list);
70+
}
71+
72+
function stripAnsi(text) {
73+
return String(text || '').replace(/\u001b\[[0-9;]*m/g, '');
74+
}
75+
76+
function jsonish(s) {
77+
const t = stripAnsi(s).trim();
78+
if (!t) return null;
79+
try {
80+
return JSON.parse(t);
81+
} catch {}
82+
for (const [a, b] of [['{', '}'], ['[', ']']]) {
83+
const i = t.indexOf(a);
84+
const j = t.lastIndexOf(b);
85+
if (i >= 0 && j > i) {
86+
try {
87+
return JSON.parse(t.slice(i, j + 1));
88+
} catch {}
89+
}
90+
}
91+
return null;
92+
}
93+
94+
function norm(file) {
95+
let f = stripAnsi(file).replace(/^file:\/\//, '').replaceAll('\\', '/').trim();
96+
if (!f) return '';
97+
const repoNorm = repo.replaceAll('\\', '/');
98+
if (f.startsWith(repoNorm)) f = relative(repo, f).replaceAll('\\', '/');
99+
const tmpIdx = f.indexOf('/tmp/contextbench-checkouts/');
100+
if (tmpIdx >= 0) {
101+
const parts = f.slice(tmpIdx + '/tmp/contextbench-checkouts/'.length).split('/');
102+
f = parts.slice(1).join('/');
103+
}
104+
f = f.replace(/^\/+/, '').replace(/^\.\//, '');
105+
if (!f || f.includes('://') || f.includes('..') || f.startsWith('tmp/')) return '';
106+
if (repoFileSet.has(f)) return f;
107+
if (existsSync(join(repo, f))) return f;
108+
const byName = basenameMap.get(basename(f));
109+
if (byName?.length === 1) return byName[0];
110+
return '';
111+
}
112+
113+
function add(locs, file, start = 1, end = start, source = 'tool') {
114+
const clean = norm(file);
115+
if (!clean) return;
116+
const s = Math.max(1, Number(start) || 1);
117+
locs.push({ file: clean, start: s, end: Math.max(s, Number(end) || s), source });
118+
}
119+
120+
function walk(value, locs, source) {
121+
if (!value || typeof value !== 'object') return;
122+
if (Array.isArray(value)) {
123+
for (const item of value) walk(item, locs, source);
124+
return;
125+
}
126+
add(
127+
locs,
128+
value.file || value.path || value.file_path || value.relative_path || value.filename || value.source_path || value.uri,
129+
value.start_line || value.startLine || value.line || value.line_number || value.start || 1,
130+
value.end_line || value.endLine || value.end || value.line || 1,
131+
source,
132+
);
133+
for (const item of Object.values(value)) walk(item, locs, source);
134+
}
135+
136+
function collect(text, locs, source) {
137+
const cleaned = stripAnsi(text);
138+
const parsed = jsonish(cleaned);
139+
if (parsed) walk(parsed, locs, source);
140+
const rgLine = /^(.+?\.(?:go|mod|sum|json|yml|yaml|md|ts|tsx|js|jsx|py|rs|java|c|cc|cpp|h|hpp|rb|php|cs|kt|swift|vue|svelte)):(\d+):/gm;
141+
let m;
142+
while ((m = rgLine.exec(cleaned)) !== null) add(locs, m[1], m[2], m[2], source);
143+
const fileLine = /([A-Za-z0-9_.\/-]+\.(?:go|mod|sum|json|yml|yaml|md|ts|tsx|js|jsx|py|rs|java|c|cc|cpp|h|hpp|rb|php|cs|kt|swift|vue|svelte))(?::|#L|\s+line\s+)?(\d+)?/g;
144+
while ((m = fileLine.exec(cleaned)) !== null) add(locs, m[1], m[2] || 1, m[2] || 1, source);
145+
}
146+
147+
function uniq(locs, max = 120) {
148+
const seen = new Set();
149+
const out = [];
150+
for (const loc of locs) {
151+
const key = `${loc.file}:${loc.start}:${loc.end}`;
152+
if (seen.has(key)) continue;
153+
seen.add(key);
154+
out.push(loc);
155+
if (out.length >= max) break;
156+
}
157+
return out;
158+
}
159+
160+
function writeCommands(lane, commands) {
161+
for (const [i, command] of commands.entries()) {
162+
writeFileSync(join(outDir, `${lane}-command-${i + 1}.json`), JSON.stringify({ ...command, stdout: command.stdout.slice(0, 200000), stderr: command.stderr.slice(0, 200000) }, null, 2));
163+
}
164+
}
165+
166+
function laneResult(lane, commands, locs, setupStatus, indexStatus, setupIndex) {
167+
writeCommands(lane, commands);
168+
const candidates = uniq(locs);
169+
return {
170+
lane,
171+
ready: (setupStatus === 'completed' || setupStatus === 'not_required') && (indexStatus === 'completed' || indexStatus === 'not_required') && commands.some((command) => command.status === 0) && candidates.length > 0,
172+
setupStatus,
173+
indexStatus,
174+
toolCallable: commands.some((command) => command.status === 0),
175+
candidateCount: candidates.length,
176+
setupIndex,
177+
commands: commands.map((command) => ({ command: command.command, status: command.status, error: command.error, durationMs: command.durationMs })),
178+
candidates,
179+
};
180+
}
181+
182+
const baseQuery = queryOf(task.problem_statement);
183+
const queryVariants = [
184+
baseQuery,
185+
'system metrics written on start prometheus writer insights',
186+
'Bearer token custom authorization header authentication',
187+
'Subsonic authorization header token parsing',
188+
'startup metrics insights prometheus initialization',
189+
];
190+
191+
function runRawNative() {
192+
const commands = [];
193+
const locs = [];
194+
for (const term of ['metrics', 'prometheus', 'insights', 'startup', 'start', 'written', 'authorization', 'Bearer', 'token', 'header', 'authentication', 'subsonic']) {
195+
const r = run('rg', ['-n', '-i', '--glob', '!.git', '--glob', '!vendor/**', '--glob', '!node_modules/**', term, '.'], { cwd: repo, timeoutMs: 60000 });
196+
commands.push(r);
197+
collect(r.stdout, locs, 'raw-native');
198+
collect(r.stderr, locs, 'raw-native');
199+
}
200+
return laneResult('raw-native', commands, locs, 'not_required', 'not_required', { setupDurationMs: 0, indexDurationMs: 0, queryDurationMs: durationOf(commands) });
201+
}
202+
203+
function runCodebaseContext() {
204+
const commands = [];
205+
const locs = [];
206+
const env = { ...process.env, CODEBASE_ROOT: repo, CODEBASE_CONTEXT_ASCII: '1' };
207+
const setup = run('node', ['dist/index.js', '--version'], { env, timeoutMs: 60000 });
208+
commands.push(setup);
209+
const index = run('node', ['dist/index.js', 'reindex'], { env, timeoutMs: 1200000 });
210+
commands.push(index);
211+
const searches = [];
212+
for (const q of queryVariants) {
213+
const search = run('node', ['dist/index.js', 'search', '--query', q, '--intent', 'edit', '--limit', '40', '--json'], { env, timeoutMs: 300000 });
214+
commands.push(search);
215+
searches.push(search);
216+
collect(search.stdout, locs, 'codebase-context');
217+
collect(search.stderr, locs, 'codebase-context');
218+
}
219+
return laneResult('codebase-context', commands, locs, setup.status === 0 ? 'completed' : 'setup_failed', index.status === 0 ? 'completed' : 'index_failed', { setupDurationMs: setup.durationMs, indexDurationMs: index.durationMs, queryDurationMs: durationOf(searches) });
220+
}
221+
222+
function runCodebaseMemoryMcp() {
223+
const commands = [];
224+
const locs = [];
225+
const env = { ...process.env, CBM_CACHE_DIR: join(outDir, 'cbm-cache'), CBM_DIAGNOSTICS: '1' };
226+
const setup = run(process.env.CBM_BIN, ['--version'], { env, timeoutMs: 60000 });
227+
commands.push(setup);
228+
const index = run(process.env.CBM_BIN, ['cli', 'index_repository', JSON.stringify({ repo_path: repo })], { cwd: repo, env, timeoutMs: 2700000 });
229+
commands.push(index);
230+
const project = (jsonish(index.stdout) || jsonish(index.stderr) || {}).project || basename(repo);
231+
const searches = [];
232+
for (const q of queryVariants) {
233+
const graph = run(process.env.CBM_BIN, ['cli', 'search_graph', JSON.stringify({ project, query: q, limit: 50 })], { cwd: repo, env, timeoutMs: 120000 });
234+
commands.push(graph);
235+
searches.push(graph);
236+
collect(graph.stdout, locs, 'codebase-memory-mcp');
237+
collect(graph.stderr, locs, 'codebase-memory-mcp');
238+
}
239+
for (const term of ['metrics', 'prometheus', 'authorization', 'Bearer', 'token']) {
240+
const code = run(process.env.CBM_BIN, ['cli', 'search_code', JSON.stringify({ project, pattern: term, mode: 'compact', limit: 50 })], { cwd: repo, env, timeoutMs: 120000 });
241+
commands.push(code);
242+
searches.push(code);
243+
collect(code.stdout, locs, 'codebase-memory-mcp');
244+
collect(code.stderr, locs, 'codebase-memory-mcp');
245+
}
246+
return laneResult('codebase-memory-mcp', commands, locs, setup.status === 0 ? 'completed' : 'setup_failed', index.status === 0 ? 'completed' : 'index_failed', { setupDurationMs: setup.durationMs, indexDurationMs: index.durationMs, queryDurationMs: durationOf(searches) });
247+
}
248+
249+
function runGrepai() {
250+
const commands = [];
251+
const locs = [];
252+
const setup = run('grepai', ['version'], { timeoutMs: 60000 });
253+
commands.push(setup);
254+
const init = run('grepai', ['init', '--yes', '--provider', 'synthetic', '--backend', 'gob'], { cwd: repo, timeoutMs: 120000 });
255+
commands.push(init);
256+
const watch = run('grepai', ['watch', '--background'], { cwd: repo, timeoutMs: 120000 });
257+
commands.push(watch);
258+
const statusChecks = [];
259+
for (let i = 0; i < 12; i += 1) {
260+
const status = run('grepai', ['status', '--no-ui'], { cwd: repo, timeoutMs: 60000 });
261+
commands.push(status);
262+
statusChecks.push(status);
263+
if (/chunks?\D+[1-9]|indexed files?\D+[1-9]/i.test(`${status.stdout}\n${status.stderr}`)) break;
264+
run('sleep', ['5'], { timeoutMs: 10000 });
265+
}
266+
const searches = [];
267+
for (const q of queryVariants) {
268+
const search = run('grepai', ['search', q, '--json', '--compact', '--limit', '40'], { cwd: repo, timeoutMs: 180000 });
269+
commands.push(search);
270+
searches.push(search);
271+
collect(search.stdout, locs, 'grepai');
272+
collect(search.stderr, locs, 'grepai');
273+
}
274+
const stop = run('grepai', ['watch', '--stop'], { cwd: repo, timeoutMs: 60000 });
275+
commands.push(stop);
276+
return laneResult('grepai', commands, locs, setup.status === 0 && init.status === 0 ? 'completed' : 'setup_failed', watch.status === 0 ? 'completed' : 'index_failed', { setupDurationMs: setup.durationMs + init.durationMs, indexDurationMs: watch.durationMs + durationOf(statusChecks), queryDurationMs: durationOf(searches), teardownDurationMs: stop.durationMs });
277+
}
278+
279+
function runCodeGraphContext() {
280+
const commands = [];
281+
const locs = [];
282+
const probe = run('codegraphcontext', ['--help'], { timeoutMs: 60000 });
283+
const cgcCommand = probe.status === 0 ? 'codegraphcontext' : 'cgc';
284+
const setup = probe.status === 0 ? probe : run(cgcCommand, ['--help'], { timeoutMs: 60000 });
285+
commands.push(setup);
286+
const index = run(cgcCommand, ['index', '.'], { cwd: repo, timeoutMs: 1200000 });
287+
commands.push(index);
288+
const queries = [];
289+
for (const pattern of ['Metrics', 'Prometheus', 'Authorization', 'Bearer', 'Token', 'Subsonic', 'Header']) {
290+
const found = run(cgcCommand, ['find', 'pattern', pattern], { cwd: repo, timeoutMs: 180000 });
291+
commands.push(found);
292+
queries.push(found);
293+
collect(found.stdout, locs, 'codegraphcontext');
294+
collect(found.stderr, locs, 'codegraphcontext');
295+
}
296+
for (const symbol of ['main', 'init', 'WriteInitialMetrics', 'ServeHTTP', 'GetUser']) {
297+
const callers = run(cgcCommand, ['analyze', 'callers', symbol], { cwd: repo, timeoutMs: 180000 });
298+
commands.push(callers);
299+
queries.push(callers);
300+
collect(callers.stdout, locs, 'codegraphcontext');
301+
collect(callers.stderr, locs, 'codegraphcontext');
302+
}
303+
const complexity = run(cgcCommand, ['analyze', 'complexity', '--limit', '80'], { cwd: repo, timeoutMs: 180000 });
304+
commands.push(complexity);
305+
queries.push(complexity);
306+
collect(complexity.stdout, locs, 'codegraphcontext');
307+
collect(complexity.stderr, locs, 'codegraphcontext');
308+
return laneResult('codegraphcontext', commands, locs, setup.status === 0 ? 'completed' : 'setup_failed', index.status === 0 ? 'completed' : 'index_failed', { setupDurationMs: setup.durationMs, indexDurationMs: index.durationMs, queryDurationMs: durationOf(queries) });
309+
}
310+
311+
const result =
312+
laneId === 'raw-native'
313+
? runRawNative()
314+
: laneId === 'codebase-context'
315+
? runCodebaseContext()
316+
: laneId === 'codebase-memory-mcp'
317+
? runCodebaseMemoryMcp()
318+
: laneId === 'grepai'
319+
? runGrepai()
320+
: runCodeGraphContext();
321+
322+
const pack = {
323+
createdAt: new Date().toISOString(),
324+
targetTaskId,
325+
task: {
326+
instance_id: task.instance_id,
327+
repo: task.repo,
328+
base_commit: task.base_commit,
329+
problem_statement: task.problem_statement,
330+
},
331+
queryVariants,
332+
readiness: result,
333+
};
334+
335+
writeFileSync(join(outDir, `${laneId}-candidate-pack.json`), JSON.stringify(pack, null, 2));
336+
writeFileSync(join(outDir, `${laneId}-readiness.json`), JSON.stringify(result, null, 2));
337+
console.log('CONTEXTBENCH_SINGLE_LANE_READINESS_JSON_START');
338+
console.log(JSON.stringify(pack, null, 2));
339+
console.log('CONTEXTBENCH_SINGLE_LANE_READINESS_JSON_END');
340+
if (!result.ready) process.exitCode = 1;

0 commit comments

Comments
 (0)