OpenHands · juanmichelini · Apr 6, 2026 · Apr 8, 2026 · Apr 8, 2026 · Apr 8, 2026
diff --git a/.github/actions/build-select-file/action.yml b/.github/actions/build-select-file/action.yml
@@ -2,10 +2,22 @@ name: Build selected instances file
 description: >
   Convert comma-separated instance IDs into a newline-separated file
   and export SELECT_FILE / N_LIMIT env vars for downstream build steps.
+  If no instance IDs are provided, the action is a no-op and the caller's
+  pre-existing SELECT_FILE env var (set at workflow level) is left untouched.
 
 inputs:
   instance-ids:
-    description: Comma-separated instance IDs (empty = no filtering)
+    # Note on semantics: an empty input here means "no override provided", so
+    # this action does nothing and the workflow-level SELECT_FILE default
+    # applies. This is distinct from passing `--select ''` to the underlying
+    # Python build script, which explicitly clears the curated default and
+    # builds the full dataset.
+    description: >
+      Comma-separated instance IDs to build. If empty, this action is a no-op
+      and downstream steps fall back to the workflow-level SELECT_FILE default.
+      To build the full dataset (no curated subset), do not rely on this input;
+      instead, override SELECT_FILE at the workflow level or call build_images.py
+      directly with `--select ''`.
     required: false
     default: ''
 
@@ -18,7 +30,7 @@ runs:
         set -euo pipefail
 
         if [ -z "${{ inputs.instance-ids }}" ]; then
-          echo "No instance IDs provided; skipping select file creation."
+          echo "No explicit instance IDs provided; using workflow default SELECT_FILE."
           exit 0
         fi
 

diff --git a/.github/workflows/build-swebenchmultimodal-images.yml b/.github/workflows/build-swebenchmultimodal-images.yml
@@ -83,14 +83,25 @@ on:
         type: string
 
 # Defaults for automatic runs; keep INSTANCE_IDS/SELECT_FILE initialized so set -euo pipefail won't fail on unset vars.
+#
+# NOTE (behavior change introduced alongside curated-subset inference defaults):
+# SELECT_FILE now defaults to `resolved_instances.txt`, so by default this
+# workflow only builds the curated 68-instance subset (the same subset used by
+# `swebenchmultimodal-infer`). This matches the inference default and avoids
+# burning CI time on instances we never evaluate.
+#
+# To build the FULL dataset, manually trigger the workflow and either:
+#   * provide explicit `instance-ids`, or
+#   * clear SELECT_FILE in your fork (e.g. set this env to '').
+# Without one of those overrides, only the 68 curated instances will be built.
 env:
   DATASET: princeton-nlp/SWE-bench_Multimodal
   SPLIT: dev
   MAX_WORKERS: '8'
   MAX_RETRIES: '2'
   N_LIMIT: '500'
   INSTANCE_IDS: ''
-  SELECT_FILE: ''
+  SELECT_FILE: ${{ github.workspace }}/benchmarks/swebenchmultimodal/resolved_instances.txt
 
 concurrency:
   group: build-swe-bench-multimodal-${{ inputs.sdk-commit || github.ref }}
@@ -172,6 +183,14 @@ jobs:
         with:
           instance-ids: ${{ env.INSTANCE_IDS }}
 
+      # Debug: Print SELECT_FILE value before building
+      - name: Debug SELECT_FILE
+        run: |
+          echo "SELECT_FILE=${SELECT_FILE}"
+          if [ -n "${SELECT_FILE}" ] && [ -f "${SELECT_FILE}" ]; then
+            echo "SELECT_FILE exists with $(wc -l < "${SELECT_FILE}") lines"
+          fi
+
       # Update SDK submodule to specific commit if provided
       # Must run BEFORE install dependencies so git submodule update works correctly
       - name: Update SDK submodule

diff --git a/benchmarks/swebenchmultimodal/README.md b/benchmarks/swebenchmultimodal/README.md
@@ -61,11 +61,10 @@ Pre-build all required docker images:
 uv run benchmarks/swebenchmultimodal/build_images.py \
   --dataset princeton-nlp/SWE-bench_Multimodal \
   --split test \
-  --select benchmarks/swebenchmultimodal/resolved_instances.txt \
   --image ghcr.io/openhands/eval-agent-server
 ```
 
-`build_images.py` does not yet inherit the inference default subset automatically, so pass `--select benchmarks/swebenchmultimodal/resolved_instances.txt` when you want image builds to match the default inference run.
+By default, `build_images.py` builds only the 68 curated instances from `benchmarks/swebenchmultimodal/resolved_instances.txt` (the same subset used for inference). To build for the full dataset, pass `--select ''`.
 
 ## Configuration
 

diff --git a/tests/test_multimodal_phased_build.py b/tests/test_multimodal_phased_build.py
@@ -289,3 +289,18 @@ def test_defaults(self):
         assert args.force_build is False
         assert args.n_limit == 0
         assert args.select == BUILD_DEFAULTS["select"]
+
+    def test_select_empty_overrides_default(self):
+        from benchmarks.swebenchmultimodal.build_images import get_parser
+
+        parser = get_parser()
+        # Empty string overrides the default (builds full dataset)
+        args = parser.parse_args(["--select", ""])
+        assert args.select == ""
+
+    def test_select_custom_file(self):
+        from benchmarks.swebenchmultimodal.build_images import get_parser
+
+        parser = get_parser()
+        args = parser.parse_args(["--select", "/custom/path/instances.txt"])
+        assert args.select == "/custom/path/instances.txt"
diff --git a/tests/test_swebenchmultimodal.py b/tests/test_swebenchmultimodal.py
@@ -66,6 +66,22 @@ def test_infer_defaults_use_existing_resolved_instances_file():
     assert DEFAULT_RESOLVED_INSTANCES_FILE.is_file()
 
 
+def test_resolved_instances_file_is_non_empty():
+    """Guard against an accidentally truncated curated subset file.
+
+    The build/inference defaults silently fall back to this file, so an empty
+    file would result in zero instances being processed without a clear error.
+    """
+    instances = [
+        line.strip()
+        for line in DEFAULT_RESOLVED_INSTANCES_FILE.read_text().splitlines()
+        if line.strip()
+    ]
+    assert len(instances) > 0, (
+        f"Curated instance file {DEFAULT_RESOLVED_INSTANCES_FILE} is empty"
+    )
+
+
 def test_resolved_instances_file_matches_solveable_annotations():
     annotations_path = DEFAULT_RESOLVED_INSTANCES_FILE.with_name(
         "ambiguity_annotations.json"