Skip to content

Preserve unavailable benchmark metrics as null #7

Preserve unavailable benchmark metrics as null

Preserve unavailable benchmark metrics as null #7

name: ContextBench Five Lane Score
on:
push:
branches: [master]
paths:
- .github/workflows/contextbench-five-lane-score.yml
- scripts/contextbench-score-five-lane-selections.mjs
- scripts/contextbench-score-five-lane-artifact-selections.mjs
- scripts/contextbench-build-publishable-report.mjs
- scripts/contextbench-print-publishable-report.mjs
- scripts/contextbench-five-lane-selections.json
- tests/fixtures/contextbench-benchmark-protocol.json
- tests/fixtures/contextbench-lanes.json
- tests/fixtures/contextbench-task-manifest.json
workflow_dispatch:
permissions:
contents: read
actions: read
jobs:
five-lane-score:
runs-on: ubuntu-latest
timeout-minutes: 45
env:
ROOT: /tmp/contextbench-five-lane-score
TASK_PAYLOADS: /tmp/contextbench-five-lane-score/task-payloads.json
CHECKOUT_ROOT: /tmp/contextbench-five-lane-score-checkouts
OFFICIAL_CONTEXTBENCH: /tmp/contextbench-five-lane-score/ContextBench-official
TARGET_TASK_ID: SWE-Bench-Pro__go__maintenance__bugfix__4df06349
SOURCE_SELECTIONS_PATH: scripts/contextbench-five-lane-selections.json
EXTERNAL_READINESS_ROOT: /tmp/contextbench-five-lane-score/external-readiness
REQUIRED_LANES: raw-native,codebase-context,codebase-memory-mcp,grepai,ripgrep-lexical
steps:
- uses: actions/checkout@v4
- uses: pnpm/action-setup@v2
with:
version: 10
- uses: actions/setup-node@v4
with:
node-version: '24'
cache: pnpm
- uses: actions/setup-python@v5
with:
python-version: '3.11'
- name: Install and materialize selected Go task
shell: bash
run: |
set -euo pipefail
mkdir -p "$ROOT" "$CHECKOUT_ROOT" "$ROOT/logs" "$EXTERNAL_READINESS_ROOT"
pnpm install --frozen-lockfile > "$ROOT/logs/pnpm-install.log" 2>&1
python -m pip install "tree-sitter==0.20.4" "tree-sitter-languages==1.10.2" datasets pyarrow > "$ROOT/logs/pip-install.log" 2>&1
git clone --depth 1 https://github.com/EuniAI/ContextBench.git "$OFFICIAL_CONTEXTBENCH" > "$ROOT/logs/contextbench-clone.log" 2>&1
node scripts/contextbench-runner.mjs --validate-fixtures > "$ROOT/logs/validate-fixtures.log" 2>&1
for attempt in 1 2 3; do
node scripts/contextbench-select-slice.mjs --write-task-payloads --out "$TASK_PAYLOADS.all" --checkout-root "$CHECKOUT_ROOT" > "$ROOT/logs/write-payloads-$attempt.log" 2>&1 && break
if [ "$attempt" = 3 ]; then exit 1; fi
sleep 5
done
node - <<'NODE'
const fs = require('node:fs');
const payloadPath = process.env.TASK_PAYLOADS;
const target = process.env.TARGET_TASK_ID;
const payload = JSON.parse(fs.readFileSync(`${payloadPath}.all`, 'utf8'));
const task = payload.tasks.find((candidate) => candidate.instance_id === target);
if (!task) throw new Error(`target task ${target} not found`);
fs.writeFileSync(payloadPath, `${JSON.stringify({ ...payload, task_count: 1, tasks: [task] }, null, 2)}\n`);
NODE
for attempt in 1 2 3; do
node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads "$TASK_PAYLOADS" --max-tasks 1 > "$ROOT/logs/materialize-$attempt.log" 2>&1 && break
if [ "$attempt" = 3 ]; then exit 1; fi
sleep 5
done
- name: Download GrepAI readiness artifact
uses: actions/download-artifact@v4
with:
name: contextbench-grepai-readiness
run-id: 25643757046
github-token: ${{ github.token }}
path: ${{ env.EXTERNAL_READINESS_ROOT }}/grepai
- name: Download ripgrep readiness artifact
uses: actions/download-artifact@v4
with:
name: contextbench-ripgrep-readiness
run-id: 25644197513
github-token: ${{ github.token }}
path: ${{ env.EXTERNAL_READINESS_ROOT }}/ripgrep
- name: Score five ready lane selections
shell: bash
run: node scripts/contextbench-score-five-lane-artifact-selections.mjs
- name: Build publishable pilot report
shell: bash
run: |
node scripts/contextbench-build-publishable-report.mjs \
--summary "$ROOT/summary.json" \
--protocol tests/fixtures/contextbench-benchmark-protocol.json \
--lanes tests/fixtures/contextbench-lanes.json \
--task-manifest tests/fixtures/contextbench-task-manifest.json \
--out "$ROOT/publishable-summary.json" \
--validation-out "$ROOT/publishable-validation.json" \
--humanized-out "$ROOT/humanized-summary.md"
- name: Print compact publishable pilot report
shell: bash
run: node scripts/contextbench-print-publishable-report.mjs "$ROOT/publishable-summary.json"
- name: Upload five-lane score artifacts
if: always()
uses: actions/upload-artifact@v4
with:
name: contextbench-five-lane-score
path: /tmp/contextbench-five-lane-score
retention-days: 14