PrimeIntellect-ai · willccbb · Mar 5, 2026 · Feb 25, 2026 · Feb 25, 2026 · Feb 25, 2026
diff --git a/assets/lab/environments/AGENTS.md b/assets/lab/environments/AGENTS.md
@@ -598,7 +598,7 @@ Verifiers defines a hierarchy of error types under `vf.Error`:
 - `vf.ModelError` — errors from model interactions (e.g., `vf.EmptyModelResponseError`)
 - `vf.OverlongPromptError` — prompt exceeds model context length
 - `vf.ToolError` — tool-related errors (`vf.ToolParseError`, `vf.ToolCallError`)
-- `vf.InfraError` — infrastructure errors (e.g., `vf.SandboxError`)
+- `vf.InfraError` — infrastructure errors (e.g., `vf.SandboxError`, `vf.TunnelError`)
 
 When a `vf.Error` is raised during a rollout, it is automatically caught and stored in `state["error"]`, triggering the built-in `has_error` stop condition at the next check. This allows rollouts to terminate gracefully rather than crashing.
 

diff --git a/environments/AGENTS.md b/environments/AGENTS.md
@@ -598,7 +598,7 @@ Verifiers defines a hierarchy of error types under `vf.Error`:
 - `vf.ModelError` — errors from model interactions (e.g., `vf.EmptyModelResponseError`)
 - `vf.OverlongPromptError` — prompt exceeds model context length
 - `vf.ToolError` — tool-related errors (`vf.ToolParseError`, `vf.ToolCallError`)
-- `vf.InfraError` — infrastructure errors (e.g., `vf.SandboxError`)
+- `vf.InfraError` — infrastructure errors (e.g., `vf.SandboxError`, `vf.TunnelError`)
 
 When a `vf.Error` is raised during a rollout, it is automatically caught and stored in `state["error"]`, triggering the built-in `has_error` stop condition at the next check. This allows rollouts to terminate gracefully rather than crashing.
 

diff --git a/tests/test_eval_cli.py b/tests/test_eval_cli.py
@@ -1,5 +1,7 @@
 import argparse
+import importlib
 import os
+import sys
 import tempfile
 import time
 from pathlib import Path
@@ -117,6 +119,48 @@ def test_cli_single_env_id(monkeypatch, run_cli):
     assert configs[0].env_id == "env1"
 
 
+def test_get_env_eval_defaults_for_package_module(tmp_path: Path, monkeypatch):
+    module_name = f"pkg_env_{time.time_ns()}"
+    env_id = module_name.replace("_", "-")
+    package_dir = tmp_path / module_name
+    package_dir.mkdir()
+    (package_dir / "__init__.py").write_text("", encoding="utf-8")
+    (package_dir / "pyproject.toml").write_text(
+        "[tool.verifiers.eval]\nnum_examples = 20\nrollouts_per_example = 6\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.syspath_prepend(str(tmp_path))
+    importlib.invalidate_caches()
+    try:
+        defaults = vf_eval.get_env_eval_defaults(env_id)
+    finally:
+        sys.modules.pop(module_name, None)
+
+    assert defaults == {"num_examples": 20, "rollouts_per_example": 6}
+
+
+def test_get_env_eval_defaults_for_single_file_module(tmp_path: Path, monkeypatch):
+    module_name = f"single_file_env_{time.time_ns()}"
+    env_id = module_name.replace("_", "-")
+    (tmp_path / f"{module_name}.py").write_text(
+        "def load_environment():\n    return None\n", encoding="utf-8"
+    )
+    (tmp_path / "pyproject.toml").write_text(
+        "[tool.verifiers.eval]\nnum_examples = 20\nrollouts_per_example = 6\n",
+        encoding="utf-8",
+    )
+
+    monkeypatch.syspath_prepend(str(tmp_path))
+    importlib.invalidate_caches()
+    try:
+        defaults = vf_eval.get_env_eval_defaults(env_id)
+    finally:
+        sys.modules.pop(module_name, None)
+
+    assert defaults == {"num_examples": 20, "rollouts_per_example": 6}
+
+
 def test_cli_sampling_args_precedence_over_flags(monkeypatch, run_cli):
     """sampling_args JSON takes precedence over individual flags."""
     captured = run_cli(

diff --git a/verifiers/rubrics/experimental/hybrid_math_rubric.py b/verifiers/rubrics/experimental/hybrid_math_rubric.py
@@ -1,4 +1,4 @@
-from math_verify import parse, verify  # type: ignore[unresolved-import]
+from math_verify import parse, verify
 from openai import AsyncOpenAI
 
 import verifiers as vf

diff --git a/verifiers/scripts/eval.py b/verifiers/scripts/eval.py
@@ -10,7 +10,7 @@
 
 import argparse
 import asyncio
-import importlib.resources
+import importlib.util
 import json
 import logging
 from pathlib import Path
@@ -89,7 +89,7 @@
 
 
 def get_env_eval_defaults(env_id: str) -> dict[str, Any]:
-    """Get eval config defaults from environment package's pyproject.toml.
+    """Get eval config defaults from the environment module's pyproject.toml.
 
     Returns dict with 'num_examples' and 'rollouts_per_example' keys if found,
     otherwise returns empty dict. All errors are silently handled.
@@ -98,12 +98,24 @@ def get_env_eval_defaults(env_id: str) -> dict[str, Any]:
     module_name = env_id.replace("-", "_").split("/")[-1]
 
     try:
-        # read pyproject.toml from installed package
-        package_ref = importlib.resources.files(module_name)
-        pyproject_file = package_ref / "pyproject.toml"
+        spec = importlib.util.find_spec(module_name)
+        if spec is None:
+            raise ModuleNotFoundError(module_name)
+
+        if spec.submodule_search_locations:
+            base_dir = Path(next(iter(spec.submodule_search_locations)))
+        elif spec.origin:
+            base_dir = Path(spec.origin).parent
+        else:
+            logger.debug(
+                f"Could not determine module path for {module_name}; skipping eval defaults"
+            )
+            return defaults
+
+        pyproject_file = base_dir / "pyproject.toml"
 
         if not pyproject_file.is_file():
-            logger.debug(f"pyproject.toml not found in installed package {module_name}")
+            logger.debug(f"pyproject.toml not found for installed module {module_name}")
             return defaults
 
         with pyproject_file.open("rb") as f:
@@ -124,7 +136,7 @@ def get_env_eval_defaults(env_id: str) -> dict[str, Any]:
                 f"Loaded eval defaults from {module_name} pyproject.toml: {defaults}"
             )
     except ModuleNotFoundError:
-        logger.debug(f"Package {module_name} not installed")
+        logger.debug(f"Module {module_name} not installed")
     except Exception as e:
         logger.debug(
             f"Could not load eval defaults from {module_name} pyproject.toml: {e}"