Preserve unavailable benchmark metrics as null #7

Workflow file for this run

.github/workflows/contextbench-five-lane-score.yml at de3d70b

	name: ContextBench Five Lane Score

	on:
	push:
	branches: [master]
	paths:
	- .github/workflows/contextbench-five-lane-score.yml
	- scripts/contextbench-score-five-lane-selections.mjs
	- scripts/contextbench-score-five-lane-artifact-selections.mjs
	- scripts/contextbench-build-publishable-report.mjs
	- scripts/contextbench-print-publishable-report.mjs
	- scripts/contextbench-five-lane-selections.json
	- tests/fixtures/contextbench-benchmark-protocol.json
	- tests/fixtures/contextbench-lanes.json
	- tests/fixtures/contextbench-task-manifest.json
	workflow_dispatch:

	permissions:
	contents: read
	actions: read

	jobs:
	five-lane-score:
	runs-on: ubuntu-latest
	timeout-minutes: 45
	env:
	ROOT: /tmp/contextbench-five-lane-score
	TASK_PAYLOADS: /tmp/contextbench-five-lane-score/task-payloads.json
	CHECKOUT_ROOT: /tmp/contextbench-five-lane-score-checkouts
	OFFICIAL_CONTEXTBENCH: /tmp/contextbench-five-lane-score/ContextBench-official
	TARGET_TASK_ID: SWE-Bench-Pro__go__maintenance__bugfix__4df06349
	SOURCE_SELECTIONS_PATH: scripts/contextbench-five-lane-selections.json
	EXTERNAL_READINESS_ROOT: /tmp/contextbench-five-lane-score/external-readiness
	REQUIRED_LANES: raw-native,codebase-context,codebase-memory-mcp,grepai,ripgrep-lexical
	steps:
	- uses: actions/checkout@v4
	- uses: pnpm/action-setup@v2
	with:
	version: 10
	- uses: actions/setup-node@v4
	with:
	node-version: '24'
	cache: pnpm
	- uses: actions/setup-python@v5
	with:
	python-version: '3.11'
	- name: Install and materialize selected Go task
	shell: bash
	run: \|
	set -euo pipefail
	mkdir -p "$ROOT" "$CHECKOUT_ROOT" "$ROOT/logs" "$EXTERNAL_READINESS_ROOT"
	pnpm install --frozen-lockfile > "$ROOT/logs/pnpm-install.log" 2>&1
	python -m pip install "tree-sitter==0.20.4" "tree-sitter-languages==1.10.2" datasets pyarrow > "$ROOT/logs/pip-install.log" 2>&1
	git clone --depth 1 https://github.com/EuniAI/ContextBench.git "$OFFICIAL_CONTEXTBENCH" > "$ROOT/logs/contextbench-clone.log" 2>&1
	node scripts/contextbench-runner.mjs --validate-fixtures > "$ROOT/logs/validate-fixtures.log" 2>&1
	for attempt in 1 2 3; do
	node scripts/contextbench-select-slice.mjs --write-task-payloads --out "$TASK_PAYLOADS.all" --checkout-root "$CHECKOUT_ROOT" > "$ROOT/logs/write-payloads-$attempt.log" 2>&1 && break
	if [ "$attempt" = 3 ]; then exit 1; fi
	sleep 5
	done
	node - <<'NODE'
	const fs = require('node:fs');
	const payloadPath = process.env.TASK_PAYLOADS;
	const target = process.env.TARGET_TASK_ID;
	const payload = JSON.parse(fs.readFileSync(`${payloadPath}.all`, 'utf8'));
	const task = payload.tasks.find((candidate) => candidate.instance_id === target);
	if (!task) throw new Error(`target task ${target} not found`);
	fs.writeFileSync(payloadPath, `${JSON.stringify({ ...payload, task_count: 1, tasks: [task] }, null, 2)}\n`);
	NODE
	for attempt in 1 2 3; do
	node scripts/contextbench-select-slice.mjs --materialize-checkouts --payloads "$TASK_PAYLOADS" --max-tasks 1 > "$ROOT/logs/materialize-$attempt.log" 2>&1 && break
	if [ "$attempt" = 3 ]; then exit 1; fi
	sleep 5
	done
	- name: Download GrepAI readiness artifact
	uses: actions/download-artifact@v4
	with:
	name: contextbench-grepai-readiness
	run-id: 25643757046
	github-token: ${{ github.token }}
	path: ${{ env.EXTERNAL_READINESS_ROOT }}/grepai
	- name: Download ripgrep readiness artifact
	uses: actions/download-artifact@v4
	with:
	name: contextbench-ripgrep-readiness
	run-id: 25644197513
	github-token: ${{ github.token }}
	path: ${{ env.EXTERNAL_READINESS_ROOT }}/ripgrep
	- name: Score five ready lane selections
	shell: bash
	run: node scripts/contextbench-score-five-lane-artifact-selections.mjs
	- name: Build publishable pilot report
	shell: bash
	run: \|
	node scripts/contextbench-build-publishable-report.mjs \
	--summary "$ROOT/summary.json" \
	--protocol tests/fixtures/contextbench-benchmark-protocol.json \
	--lanes tests/fixtures/contextbench-lanes.json \
	--task-manifest tests/fixtures/contextbench-task-manifest.json \
	--out "$ROOT/publishable-summary.json" \
	--validation-out "$ROOT/publishable-validation.json" \
	--humanized-out "$ROOT/humanized-summary.md"
	- name: Print compact publishable pilot report
	shell: bash
	run: node scripts/contextbench-print-publishable-report.mjs "$ROOT/publishable-summary.json"
	- name: Upload five-lane score artifacts
	if: always()
	uses: actions/upload-artifact@v4
	with:
	name: contextbench-five-lane-score
	path: /tmp/contextbench-five-lane-score
	retention-days: 14

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Preserve unavailable benchmark metrics as null #7

Workflow file

Preserve unavailable benchmark metrics as null #7

Uh oh!

Workflow file for this run