PrimeIntellect-ai · nph4rd · Feb 7, 2026 · Feb 8, 2026 · Feb 8, 2026 · Feb 8, 2026
diff --git a/pyproject.toml b/pyproject.toml
@@ -105,6 +105,9 @@ rl = [
 [tool.uv]
 preview = true
 
+[tool.uv.sources]
+textarena = { git = "https://github.com/nph4rd/TextArena.git", branch = "fix/kuhn-poker-phantom-ante" }
+
 [tool.uv.extra-build-dependencies]
 flash-attn = [{ requirement = "torch", match-runtime = true }]
 

diff --git a/verifiers/__init__.py b/verifiers/__init__.py
@@ -55,8 +55,12 @@
     "MCPEnv",
     "BrowserEnv",
     "OpenEnvEnv",
+    "Agent",
+    "Protocol",
+    "RoundRobinProtocol",
     "Environment",
     "MultiTurnEnv",
+    "MultiAgentEnv",
     "SingleTurnEnv",
     "PythonEnv",
     "SandboxEnv",
@@ -110,6 +114,10 @@
     "SingleTurnEnv": "verifiers.envs.singleturn_env:SingleTurnEnv",
     "StatefulToolEnv": "verifiers.envs.stateful_tool_env:StatefulToolEnv",
     "ToolEnv": "verifiers.envs.tool_env:ToolEnv",
+    "Agent": "verifiers.envs.agent:Agent",
+    "Protocol": "verifiers.envs.protocol:Protocol",
+    "RoundRobinProtocol": "verifiers.envs.protocol:RoundRobinProtocol",
+    "MultiAgentEnv": "verifiers.envs.multiagent_env:MultiAgentEnv",
     "EnvGroup": "verifiers.envs.env_group:EnvGroup",
     "JudgeRubric": "verifiers.rubrics.judge_rubric:JudgeRubric",
     "load_environment": "verifiers.utils.env_utils:load_environment",
@@ -173,8 +181,11 @@ def __getattr__(name: str):
         OpenAIChatCompletionsTokenClient,
     )
     from .clients.openai_completions_client import OpenAICompletionsClient  # noqa: F401
+    from .envs.agent import Agent  # noqa: F401
+    from .envs.protocol import Protocol, RoundRobinProtocol  # noqa: F401
     from .envs.env_group import EnvGroup  # noqa: F401
     from .envs.environment import Environment  # noqa: F401
+    from .envs.multiagent_env import MultiAgentEnv  # noqa: F401
     from .envs.experimental.cli_agent_env import CliAgentEnv  # noqa: F401
     from .envs.experimental.gym_env import GymEnv  # noqa: F401
     from .envs.experimental.harbor_env import HarborEnv  # noqa: F401

diff --git a/verifiers/envs/agent.py b/verifiers/envs/agent.py
@@ -0,0 +1,35 @@
+"""
+Agent: A participant in multi-agent environments.
+
+Contains agent metadata (id, system prompt, trainability).
+"""
+
+from dataclasses import dataclass
+
+
+@dataclass
+class Agent:
+    """
+    An agent in a multi-agent environment.
+
+    Fields:
+        id: Unique identifier for this agent (e.g., "player_0", "guesser")
+        system_prompt: The agent's specific instructions
+        is_trainable: Whether to compute gradients for this agent's actions
+    """
+
+    id: str
+    system_prompt: str = ""
+    is_trainable: bool = True
+
+    def __hash__(self) -> int:
+        return hash(self.id)
+
+    def __eq__(self, other: object) -> bool:
+        if isinstance(other, Agent):
+            return self.id == other.id
+        return False
+
+    def __repr__(self) -> str:
+        trainable_str = "trainable" if self.is_trainable else "frozen"
+        return f"Agent(id={self.id!r}, {trainable_str})"
diff --git a/verifiers/envs/env_group.py b/verifiers/envs/env_group.py
@@ -278,11 +278,19 @@ async def run_rollout(  # type: ignore[override]
         max_retries: int = 0,
         state_columns: list[str] | None = None,
         env_client: EnvClient | None = None,
+        actor_models: dict[str, str] | None = None,
     ) -> vf.RolloutOutput:
         env = self.get_env_for_task(input["task"])
         env_client = env_client or env.env_client or self.env_client
         return await env.run_rollout(
-            input, client, model, sampling_args, max_retries, state_columns, env_client
+            input,
+            client,
+            model,
+            sampling_args,
+            max_retries,
+            state_columns,
+            env_client,
+            actor_models=actor_models,
         )
 
     @final
@@ -295,6 +303,7 @@ async def run_group(  # type: ignore[override]
         max_retries: int = 0,
         state_columns: list[str] | None = None,
         env_client: EnvClient | None = None,
+        actor_models: dict[str, str] | None = None,
     ) -> list[vf.RolloutOutput]:
         env = self.get_env_for_task(group_inputs[0]["task"])
         env_client = env_client or env.env_client or self.env_client
@@ -306,6 +315,7 @@ async def run_group(  # type: ignore[override]
             max_retries,
             state_columns,
             env_client,
+            actor_models=actor_models,
         )
 
     @final

diff --git a/verifiers/envs/environment.py b/verifiers/envs/environment.py
@@ -36,7 +36,7 @@
 from verifiers.utils.eval_utils import filter_inputs
 from verifiers.utils.path_utils import is_valid_eval_results_path
 from verifiers.utils.thread_utils import scale_executors
-from verifiers.utils.worker_utils import get_free_port_pair
+from verifiers.utils.worker_utils import get_free_port_pair, release_reserved_ports
 from verifiers.workers.client.zmq_env_client import ZMQEnvClient
 from verifiers.workers.server.zmq_env_server import ZMQEnvServer
 
@@ -687,6 +687,7 @@ async def run_rollout(
         max_retries: int = 0,
         state_columns: list[str] | None = None,
         env_client: EnvClient | None = None,
+        actor_models: dict[str, str] | None = None,
     ) -> RolloutOutput:
         """Generate and, optionally, score a rollout."""
 
@@ -707,6 +708,7 @@ async def run_rollout(
                 sampling_args,
                 max_retries,
                 state_columns,
+                actor_models=actor_models,
             )
 
         resolved_client = resolve_client(client)
@@ -742,6 +744,7 @@ async def run_group(
         max_retries: int = 0,
         state_columns: list[str] | None = None,
         env_client: EnvClient | None = None,
+        actor_models: dict[str, str] | None = None,
         **kwargs,
     ) -> list[RolloutOutput]:
         """Generate and, optionally, score one group."""
@@ -763,6 +766,7 @@ async def run_group(
                 sampling_args,
                 max_retries,
                 state_columns,
+                actor_models=actor_models,
             )
 
         resolved_client = resolve_client(client)
@@ -1282,6 +1286,10 @@ async def start_server(
         """
         address = address or f"tcp://127.0.0.1:{get_free_port_pair()}"
         extra_env_kwargs = extra_env_kwargs or {}
+        # Release the port-reservation sockets so the subprocess can bind.
+        # With "spawn" context the subprocess doesn't inherit FDs, and on
+        # macOS ZMQ cannot bind over a held SO_REUSEADDR socket.
+        release_reserved_ports()
         # Use spawn to avoid inheriting file descriptors (e.g. sockets) from
         # the parent process, which has caused hangs when multiple env server
         # subprocesses share the same fds.