Skip to content

Commit 7665d32

Browse files
committed
Add ContextBench ripgrep readiness pack generator
1 parent 05f1385 commit 7665d32

1 file changed

Lines changed: 170 additions & 0 deletions

File tree

Lines changed: 170 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,170 @@
1+
import { spawnSync } from 'node:child_process';
2+
import { mkdirSync, readFileSync, writeFileSync } from 'node:fs';
3+
import { join } from 'node:path';
4+
5+
const targetTaskId = process.env.TARGET_TASK_ID || 'SWE-Bench-Pro__go__maintenance__bugfix__4df06349';
6+
const root = process.env.ROOT || '/tmp/contextbench-ripgrep-readiness';
7+
const outDir = join(root, 'pack');
8+
const logsDir = join(root, 'logs');
9+
mkdirSync(outDir, { recursive: true });
10+
mkdirSync(logsDir, { recursive: true });
11+
12+
const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
13+
const task = payloads.tasks.find((candidate) => candidate.instance_id === targetTaskId);
14+
if (!task) throw new Error(`target task ${targetTaskId} missing from payloads`);
15+
const repo = task.repo_checkout_path;
16+
17+
function run(cmd, args, opts = {}) {
18+
const started = Date.now();
19+
const result = spawnSync(cmd, args, {
20+
cwd: opts.cwd || process.cwd(),
21+
env: opts.env || process.env,
22+
encoding: 'utf8',
23+
timeout: opts.timeoutMs || 300000,
24+
maxBuffer: 128 * 1024 * 1024,
25+
});
26+
return {
27+
command: [cmd, ...args].join(' '),
28+
cwd: opts.cwd || process.cwd(),
29+
status: typeof result.status === 'number' ? result.status : null,
30+
signal: result.signal,
31+
error: result.error?.message || null,
32+
durationMs: Date.now() - started,
33+
stdout: result.stdout || '',
34+
stderr: result.stderr || '',
35+
};
36+
}
37+
38+
function stripAnsi(text) {
39+
return String(text || '').replace(/\u001b\[[0-9;]*m/g, '');
40+
}
41+
42+
function addCandidate(candidates, file, line, source, matchText = '') {
43+
const clean = String(file || '').replaceAll('\\', '/').replace(/^\.\//, '');
44+
if (!clean || clean.startsWith('../') || clean.includes('://')) return;
45+
const n = Math.max(1, Number(line) || 1);
46+
const start = Math.max(1, n - 12);
47+
const end = n + 12;
48+
candidates.push({ file: clean, start, end, line: n, source, matchText: stripAnsi(matchText).trim().slice(0, 220) });
49+
}
50+
51+
function parseRgJson(output, candidates, source) {
52+
for (const line of String(output || '').split(/\n+/)) {
53+
if (!line.trim()) continue;
54+
let event;
55+
try {
56+
event = JSON.parse(line);
57+
} catch {
58+
continue;
59+
}
60+
if (event.type !== 'match') continue;
61+
addCandidate(
62+
candidates,
63+
event.data?.path?.text,
64+
event.data?.line_number,
65+
source,
66+
event.data?.lines?.text || '',
67+
);
68+
}
69+
}
70+
71+
function uniq(candidates, max = 160) {
72+
const seen = new Set();
73+
const out = [];
74+
for (const candidate of candidates) {
75+
const key = `${candidate.file}:${candidate.start}:${candidate.end}`;
76+
if (seen.has(key)) continue;
77+
seen.add(key);
78+
out.push(candidate);
79+
if (out.length >= max) break;
80+
}
81+
return out;
82+
}
83+
84+
const queries = [
85+
{ id: 'metrics-startup', pattern: '(?i)prometheus|metrics|metric|insights|startup|start' },
86+
{ id: 'auth-header-token', pattern: '(?i)authorization|bearer|token|header|subsonic|auth' },
87+
{ id: 'write-collect-init', pattern: '(?i)collector|collect|write|init|server' },
88+
];
89+
90+
const commands = [];
91+
const candidates = [];
92+
const version = run('rg', ['--version'], { timeoutMs: 60000 });
93+
commands.push(version);
94+
95+
for (const query of queries) {
96+
const result = run(
97+
'rg',
98+
[
99+
'--json',
100+
'-n',
101+
'--hidden',
102+
'--glob',
103+
'!.git/**',
104+
'--glob',
105+
'!vendor/**',
106+
'--glob',
107+
'!ui/**',
108+
'--glob',
109+
'*.go',
110+
query.pattern,
111+
'.',
112+
],
113+
{ cwd: repo, timeoutMs: 180000 },
114+
);
115+
commands.push(result);
116+
parseRgJson(result.stdout, candidates, `ripgrep-lexical:${query.id}`);
117+
}
118+
119+
const uniqueCandidates = uniq(candidates);
120+
const setupStatus = version.status === 0 ? 'completed' : 'setup_failed';
121+
const queryCommands = commands.slice(1);
122+
const toolCallable = queryCommands.some((command) => command.status === 0 || command.status === 1);
123+
const queryOk = queryCommands.every((command) => command.status === 0 || command.status === 1);
124+
const readiness = {
125+
lane: 'ripgrep-lexical',
126+
ready: setupStatus === 'completed' && toolCallable && queryOk && uniqueCandidates.length > 0,
127+
setupStatus,
128+
indexStatus: 'not_required',
129+
toolCallable,
130+
candidateCount: uniqueCandidates.length,
131+
setupIndex: {
132+
setupDurationMs: version.durationMs,
133+
indexDurationMs: 0,
134+
queryDurationMs: queryCommands.reduce((sum, command) => sum + command.durationMs, 0),
135+
},
136+
notes: [
137+
'ripgrep-lexical is a no-index local lexical competitor; readiness requires rg callable plus non-empty real repo file/span candidates.',
138+
'rg exit code 1 means no matches for a query and is accepted only when at least one required query returns candidates.',
139+
],
140+
commands: commands.map((command) => ({
141+
command: command.command,
142+
status: command.status,
143+
signal: command.signal,
144+
error: command.error,
145+
durationMs: command.durationMs,
146+
stdoutExcerpt: stripAnsi(command.stdout).slice(0, 1200),
147+
stderrExcerpt: stripAnsi(command.stderr).slice(0, 1200),
148+
})),
149+
candidates: uniqueCandidates,
150+
};
151+
152+
const pack = {
153+
createdAt: new Date().toISOString(),
154+
targetTaskId,
155+
task: {
156+
instance_id: task.instance_id,
157+
repo: task.repo,
158+
base_commit: task.base_commit,
159+
problem_statement: task.problem_statement,
160+
},
161+
queries,
162+
readiness,
163+
};
164+
165+
writeFileSync(join(outDir, 'ripgrep-candidate-pack.json'), JSON.stringify(pack, null, 2));
166+
writeFileSync(join(outDir, 'ripgrep-readiness.json'), JSON.stringify(readiness, null, 2));
167+
console.log('CONTEXTBENCH_RIPGREP_READY_JSON_START');
168+
console.log(JSON.stringify(pack, null, 2));
169+
console.log('CONTEXTBENCH_RIPGREP_READY_JSON_END');
170+
if (!readiness.ready) process.exitCode = 1;

0 commit comments

Comments
 (0)