Skip to content

Commit 8865e8d

Browse files
committed
Add focused GrepAI readiness pack script
1 parent eb65285 commit 8865e8d

1 file changed

Lines changed: 269 additions & 0 deletions

File tree

Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
import { spawnSync } from 'node:child_process';
2+
import { existsSync, mkdirSync, readFileSync, readdirSync, writeFileSync } from 'node:fs';
3+
import { basename, join, relative } from 'node:path';
4+
5+
const targetTaskId = process.env.TARGET_TASK_ID || 'SWE-Bench-Pro__go__maintenance__bugfix__4df06349';
6+
const root = process.env.ROOT || '/tmp/contextbench-grepai-readiness';
7+
const outDir = join(root, 'pack');
8+
const logsDir = join(root, 'logs');
9+
mkdirSync(outDir, { recursive: true });
10+
mkdirSync(logsDir, { recursive: true });
11+
12+
const payloads = JSON.parse(readFileSync(process.env.TASK_PAYLOADS, 'utf8'));
13+
const task = payloads.tasks.find((candidate) => candidate.instance_id === targetTaskId);
14+
if (!task) throw new Error(`target task ${targetTaskId} missing from payloads`);
15+
const repo = task.repo_checkout_path;
16+
17+
function run(cmd, args, opts = {}) {
18+
const started = Date.now();
19+
const r = spawnSync(cmd, args, {
20+
cwd: opts.cwd || process.cwd(),
21+
env: opts.env || process.env,
22+
encoding: 'utf8',
23+
timeout: opts.timeoutMs || 300000,
24+
maxBuffer: 128 * 1024 * 1024,
25+
});
26+
return {
27+
command: [cmd, ...args].join(' '),
28+
cwd: opts.cwd || process.cwd(),
29+
status: typeof r.status === 'number' ? r.status : null,
30+
signal: r.signal,
31+
error: r.error?.message || null,
32+
durationMs: Date.now() - started,
33+
stdout: r.stdout || '',
34+
stderr: r.stderr || '',
35+
};
36+
}
37+
38+
function collectRepoFiles(dir, prefix = '', files = []) {
39+
for (const entry of readdirSync(dir, { withFileTypes: true })) {
40+
if (entry.name === '.git' || entry.name === 'vendor' || entry.name === 'node_modules') continue;
41+
const rel = prefix ? `${prefix}/${entry.name}` : entry.name;
42+
const abs = join(dir, entry.name);
43+
if (entry.isDirectory()) collectRepoFiles(abs, rel, files);
44+
else files.push(rel.replaceAll('\\', '/'));
45+
}
46+
return files;
47+
}
48+
49+
const repoFiles = collectRepoFiles(repo);
50+
const repoFileSet = new Set(repoFiles);
51+
const basenameMap = new Map();
52+
for (const file of repoFiles) {
53+
const list = basenameMap.get(basename(file)) || [];
54+
list.push(file);
55+
basenameMap.set(basename(file), list);
56+
}
57+
58+
function stripAnsi(text) {
59+
return String(text || '').replace(/\u001b\[[0-9;]*m/g, '');
60+
}
61+
62+
function norm(file) {
63+
let f = stripAnsi(file).replace(/^file:\/\//, '').replaceAll('\\', '/').trim();
64+
if (!f) return '';
65+
const repoNorm = repo.replaceAll('\\', '/');
66+
if (f.startsWith(repoNorm)) f = relative(repo, f).replaceAll('\\', '/');
67+
f = f.replace(/^\/+/, '').replace(/^\.\//, '');
68+
if (!f || f.includes('://') || f.includes('..')) return '';
69+
if (repoFileSet.has(f)) return f;
70+
if (existsSync(join(repo, f))) return f;
71+
const byName = basenameMap.get(basename(f));
72+
if (byName?.length === 1) return byName[0];
73+
return '';
74+
}
75+
76+
function add(locs, file, start = 1, end = start, source = 'grepai-search') {
77+
const clean = norm(file);
78+
if (!clean) return;
79+
const s = Math.max(1, Number(start) || 1);
80+
locs.push({ file: clean, start: s, end: Math.max(s, Number(end) || s), source });
81+
}
82+
83+
function jsonish(text) {
84+
const cleaned = stripAnsi(text).trim();
85+
if (!cleaned) return null;
86+
try { return JSON.parse(cleaned); } catch {}
87+
for (const [open, close] of [['{', '}'], ['[', ']']]) {
88+
const start = cleaned.indexOf(open);
89+
const end = cleaned.lastIndexOf(close);
90+
if (start >= 0 && end > start) {
91+
try { return JSON.parse(cleaned.slice(start, end + 1)); } catch {}
92+
}
93+
}
94+
return null;
95+
}
96+
97+
function pathFromObject(value) {
98+
return value.file || value.path || value.file_path || value.filePath || value.relative_path || value.filename || value.source_path || value.uri;
99+
}
100+
101+
function walk(value, locs, source) {
102+
if (!value || typeof value !== 'object') return;
103+
if (Array.isArray(value)) {
104+
for (const item of value) walk(item, locs, source);
105+
return;
106+
}
107+
add(locs, pathFromObject(value), value.start_line || value.startLine || value.line || value.start || 1, value.end_line || value.endLine || value.end || value.line || 1, source);
108+
for (const item of Object.values(value)) walk(item, locs, source);
109+
}
110+
111+
function collect(text, locs, source) {
112+
const cleaned = stripAnsi(text);
113+
const parsed = jsonish(cleaned);
114+
if (parsed) walk(parsed, locs, source);
115+
const lineMatch = /^(.+?\.(?:go|mod|sum|json|yml|yaml|md|ts|tsx|js|jsx|py|rs|java|c|cc|cpp|h|hpp|rb|php|cs|kt|swift|vue|svelte)):(\d+)(?:-(\d+))?/gm;
116+
let m;
117+
while ((m = lineMatch.exec(cleaned)) !== null) add(locs, m[1], m[2], m[3] || m[2], source);
118+
const fileMatch = /([A-Za-z0-9_.\/-]+\.(?:go|mod|sum|json|yml|yaml|md|ts|tsx|js|jsx|py|rs|java|c|cc|cpp|h|hpp|rb|php|cs|kt|swift|vue|svelte))(?::|#L|\s+line\s+)?(\d+)?(?:-(\d+))?/g;
119+
while ((m = fileMatch.exec(cleaned)) !== null) add(locs, m[1], m[2] || 1, m[3] || m[2] || 1, source);
120+
}
121+
122+
function uniq(locs, max = 200) {
123+
const seen = new Set();
124+
const out = [];
125+
for (const loc of locs) {
126+
const key = `${loc.file}:${loc.start}:${loc.end}`;
127+
if (seen.has(key)) continue;
128+
seen.add(key);
129+
out.push(loc);
130+
if (out.length >= max) break;
131+
}
132+
return out;
133+
}
134+
135+
function queryOf(text) {
136+
const stop = new Set(['that', 'this', 'with', 'from', 'when', 'then', 'into', 'should', 'would', 'could', 'there', 'where', 'which', 'about', 'after', 'before', 'have', 'will', 'been', 'than', 'also', 'only', 'some', 'using', 'must']);
137+
return String(text || '')
138+
.replace(/[`*_#>\[\](){},.;:!?/\\]/g, ' ')
139+
.split(/\s+/)
140+
.filter((w) => w.length >= 4 && !stop.has(w.toLowerCase()))
141+
.slice(0, 24)
142+
.join(' ');
143+
}
144+
145+
const queryVariants = [
146+
queryOf(task.problem_statement),
147+
'system metrics written on start prometheus writer insights',
148+
'Bearer token custom authorization header authentication',
149+
'Subsonic authorization header token parsing',
150+
'startup metrics insights prometheus initialization',
151+
];
152+
153+
const env = { ...process.env, OLLAMA_HOST: process.env.OLLAMA_HOST || 'http://127.0.0.1:11434' };
154+
const commands = [];
155+
const locs = [];
156+
const setup = run('grepai', ['version'], { env, timeoutMs: 60000 });
157+
commands.push(setup);
158+
const ollama = run('ollama', ['list'], { env, timeoutMs: 60000 });
159+
commands.push(ollama);
160+
const init = run('grepai', ['init', '--yes', '--provider', 'ollama', '--model', 'nomic-embed-text', '--backend', 'gob'], { cwd: repo, env, timeoutMs: 120000 });
161+
commands.push(init);
162+
163+
const indexScript = `
164+
set -euo pipefail
165+
LOG=${JSON.stringify(join(logsDir, 'grepai-watch-no-ui.log'))}
166+
STATUSLOG=${JSON.stringify(join(logsDir, 'grepai-status-loop.log'))}
167+
: > "$LOG"
168+
: > "$STATUSLOG"
169+
grepai watch --no-ui > "$LOG" 2>&1 &
170+
pid=$!
171+
ready=0
172+
for i in $(seq 1 180); do
173+
status="$(grepai status --no-ui 2>&1 || true)"
174+
printf -- '--- status attempt %s ---\n%s\n' "$i" "$status" >> "$STATUSLOG"
175+
if printf -- '%s\n' "$status" | grep -Eiq 'Files indexed:[[:space:]]*[1-9][0-9]*|Total chunks:[[:space:]]*[1-9][0-9]*|[1-9][0-9]* chunks created|[1-9][0-9]* files indexed'; then
176+
ready=1
177+
break
178+
fi
179+
if grep -Eiq 'Initial scan complete|[1-9][0-9]* files indexed|[1-9][0-9]* chunks created' "$LOG"; then
180+
ready=1
181+
break
182+
fi
183+
if ! kill -0 "$pid" 2>/dev/null; then
184+
echo 'grepai watch exited before textual readiness; search will be used as the functional readiness proof'
185+
tail -200 "$LOG" || true
186+
exit 2
187+
fi
188+
sleep 2
189+
done
190+
kill -INT "$pid" 2>/dev/null || true
191+
for i in $(seq 1 30); do
192+
if ! kill -0 "$pid" 2>/dev/null; then break; fi
193+
sleep 1
194+
done
195+
kill -TERM "$pid" 2>/dev/null || true
196+
wait "$pid" || true
197+
grepai status --no-ui || true
198+
tail -200 "$STATUSLOG" || true
199+
tail -200 "$LOG" || true
200+
if [ "$ready" -ne 1 ]; then exit 2; fi
201+
`;
202+
const index = run('bash', ['-lc', indexScript], { cwd: repo, env, timeoutMs: 600000 });
203+
commands.push(index);
204+
205+
const searches = [];
206+
for (const q of queryVariants) {
207+
const search = run('grepai', ['search', q, '--json', '--compact', '--limit', '50'], { cwd: repo, env, timeoutMs: 240000 });
208+
commands.push(search);
209+
searches.push(search);
210+
collect(search.stdout, locs, 'grepai-search');
211+
collect(search.stderr, locs, 'grepai-search');
212+
}
213+
214+
const candidates = uniq(locs);
215+
const setupStatus = setup.status === 0 && ollama.status === 0 && init.status === 0 ? 'completed' : 'setup_failed';
216+
const searchSucceeded = searches.some((command) => command.status === 0);
217+
const indexFunctional = index.status === 0 || (searchSucceeded && candidates.length > 0);
218+
const setupIndex = {
219+
setupDurationMs: setup.durationMs + ollama.durationMs + init.durationMs,
220+
indexDurationMs: index.durationMs,
221+
queryDurationMs: searches.reduce((sum, command) => sum + command.durationMs, 0),
222+
};
223+
const notes = [
224+
'Readiness requires setup, callable grepai search, and non-empty candidate spans/files from real repo queries.',
225+
];
226+
if (index.status !== 0 && indexFunctional) {
227+
notes.push(`grepai watch exited with status ${index.status}; readiness accepts the lane because subsequent grepai search returned ${candidates.length} repo locations from the persisted index.`);
228+
}
229+
230+
const readiness = {
231+
lane: 'grepai',
232+
ready: setupStatus === 'completed' && indexFunctional && searchSucceeded && candidates.length > 0,
233+
setupStatus,
234+
indexStatus: indexFunctional ? 'completed' : 'index_failed',
235+
toolCallable: searchSucceeded,
236+
candidateCount: candidates.length,
237+
setupIndex,
238+
notes,
239+
commands: commands.map((command) => ({
240+
command: command.command,
241+
status: command.status,
242+
signal: command.signal,
243+
error: command.error,
244+
durationMs: command.durationMs,
245+
stdoutExcerpt: stripAnsi(command.stdout).slice(0, 1200),
246+
stderrExcerpt: stripAnsi(command.stderr).slice(0, 1200),
247+
})),
248+
candidates,
249+
};
250+
251+
const pack = {
252+
createdAt: new Date().toISOString(),
253+
targetTaskId,
254+
task: {
255+
instance_id: task.instance_id,
256+
repo: task.repo,
257+
base_commit: task.base_commit,
258+
problem_statement: task.problem_statement,
259+
},
260+
queryVariants,
261+
readiness,
262+
};
263+
264+
writeFileSync(join(outDir, 'grepai-candidate-pack.json'), JSON.stringify(pack, null, 2));
265+
writeFileSync(join(outDir, 'grepai-readiness.json'), JSON.stringify(readiness, null, 2));
266+
console.log('CONTEXTBENCH_GREPAI_READY_JSON_START');
267+
console.log(JSON.stringify(pack, null, 2));
268+
console.log('CONTEXTBENCH_GREPAI_READY_JSON_END');
269+
if (!readiness.ready) process.exitCode = 1;

0 commit comments

Comments
 (0)