Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ RUN test -x /agent-server/dist/openhands-agent-server
FROM ${BASE_IMAGE} AS base-image-minimal
ARG USERNAME UID GID PORT


ARG OPENHANDS_BUILD_GIT_SHA=unknown
ARG OPENHANDS_BUILD_GIT_REF=unknown
ENV OPENHANDS_BUILD_GIT_SHA=${OPENHANDS_BUILD_GIT_SHA}
ENV OPENHANDS_BUILD_GIT_REF=${OPENHANDS_BUILD_GIT_REF}

# Install base packages and create user
RUN set -eux; \
# Install base packages (works for both Debian-based images)
Expand Down
4 changes: 4 additions & 0 deletions openhands-agent-server/openhands/agent_server/docker/build.py
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,10 @@ def build(opts: BuildOptions) -> list[str]:
opts.target,
"--build-arg",
f"BASE_IMAGE={opts.base_image}",
"--build-arg",
f"OPENHANDS_BUILD_GIT_SHA={opts.git_sha}",
"--build-arg",
f"OPENHANDS_BUILD_GIT_REF={opts.git_ref}",
]
if push:
args += ["--platform", ",".join(opts.platforms), "--push"]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
import os
import sys
import time
from importlib.metadata import version

from fastapi import APIRouter, Response
from pydantic import BaseModel
from pydantic import BaseModel, Field


server_details_router = APIRouter(prefix="", tags=["Server Details"])
Expand All @@ -11,11 +13,37 @@
_initialization_complete = False


def _package_version(dist_name: str) -> str:
try:
return version(dist_name)
except Exception:
return "unknown"


class ServerInfo(BaseModel):
uptime: float
idle_time: float
title: str = "OpenHands Agent Server"
version: str = version("openhands-agent-server")

version: str = Field(
default_factory=lambda: _package_version("openhands-agent-server")
)
sdk_version: str = Field(default_factory=lambda: _package_version("openhands-sdk"))
tools_version: str = Field(
default_factory=lambda: _package_version("openhands-tools")
)
workspace_version: str = Field(
default_factory=lambda: _package_version("openhands-workspace")
)

build_git_sha: str = Field(
default_factory=lambda: os.environ.get("OPENHANDS_BUILD_GIT_SHA", "unknown")
)
build_git_ref: str = Field(
default_factory=lambda: os.environ.get("OPENHANDS_BUILD_GIT_REF", "unknown")
)
python_version: str = Field(default_factory=lambda: sys.version)

docs: str = "/docs"
redoc: str = "/redoc"

Expand Down
8 changes: 7 additions & 1 deletion openhands-sdk/openhands/sdk/agent/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -447,7 +447,13 @@ def _extract_security_risk(
f"Failed to provide security_risk field in tool '{tool_name}'"
)

# When using weaker models without security analyzer
# When no security analyzer is configured, ignore any security_risk field
# from LLM and return UNKNOWN. This ensures that security_risk is only
# evaluated when a security analyzer is explicitly set.
if security_analyzer is None:
return risk.SecurityRisk.UNKNOWN

# When using non-LLM security analyzer without security risk field
# safely ignore missing security risk fields
if not requires_sr and raw is None:
return risk.SecurityRisk.UNKNOWN
Expand Down
15 changes: 15 additions & 0 deletions openhands-sdk/openhands/sdk/workspace/remote/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,21 @@ def _execute(self, generator: Generator[dict[str, Any], httpx.Response, Any]):
except StopIteration as e:
return e.value

def get_server_info(self) -> dict[str, Any]:
"""Return server metadata from the agent-server.

This is useful for debugging version mismatches between the local SDK and
the remote agent-server image.

Returns:
A JSON-serializable dict returned by GET /server_info.
"""
response = self.client.get("/server_info")
response.raise_for_status()
data = response.json()
assert isinstance(data, dict)
return data

def execute_command(
self,
command: str,
Expand Down
27 changes: 16 additions & 11 deletions tests/sdk/agent/test_extract_security_risk.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,24 +62,29 @@ def agent_without_analyzer(mock_llm):
("agent_with_llm_analyzer", "MEDIUM", SecurityRisk.MEDIUM, False),
("agent_with_llm_analyzer", "HIGH", SecurityRisk.HIGH, False),
("agent_with_llm_analyzer", "UNKNOWN", SecurityRisk.UNKNOWN, False),
# Case 2: analyzer is not set, security risk is passed, extracted properly
# Case 2: Non-LLM analyzer set, security risk is passed, extracted properly
("agent_with_non_llm_analyzer", "LOW", SecurityRisk.LOW, False),
("agent_with_non_llm_analyzer", "MEDIUM", SecurityRisk.MEDIUM, False),
("agent_with_non_llm_analyzer", "HIGH", SecurityRisk.HIGH, False),
("agent_with_non_llm_analyzer", "UNKNOWN", SecurityRisk.UNKNOWN, False),
("agent_without_analyzer", "LOW", SecurityRisk.LOW, False),
("agent_without_analyzer", "MEDIUM", SecurityRisk.MEDIUM, False),
("agent_without_analyzer", "HIGH", SecurityRisk.HIGH, False),
# Case 3: No analyzer set, security risk is passed, should be ignored
# (return UNKNOWN)
("agent_without_analyzer", "LOW", SecurityRisk.UNKNOWN, False),
("agent_without_analyzer", "MEDIUM", SecurityRisk.UNKNOWN, False),
("agent_without_analyzer", "HIGH", SecurityRisk.UNKNOWN, False),
("agent_without_analyzer", "UNKNOWN", SecurityRisk.UNKNOWN, False),
# Case 3: LLM analyzer set, security risk not passed, ValueError raised
# Case 4: LLM analyzer set, security risk not passed, ValueError raised
("agent_with_llm_analyzer", None, None, True),
# Case 4: analyzer is not set, security risk is not passed, UNKNOWN returned
# Case 5: analyzer is not set, security risk is not passed, UNKNOWN returned
("agent_with_non_llm_analyzer", None, SecurityRisk.UNKNOWN, False),
("agent_without_analyzer", None, SecurityRisk.UNKNOWN, False),
# Case 5: invalid security risk value passed, ValueError raised
# Case 6: invalid security risk value passed
# - With LLM analyzer: ValueError raised for validation
# - With non-LLM analyzer: ValueError raised for invalid enum
# - Without analyzer: ignored, returns UNKNOWN (no validation attempted)
("agent_with_llm_analyzer", "INVALID", None, True),
("agent_with_non_llm_analyzer", "INVALID", None, True),
("agent_without_analyzer", "INVALID", None, True),
("agent_without_analyzer", "INVALID", SecurityRisk.UNKNOWN, False),
],
)
def test_extract_security_risk(
Expand Down Expand Up @@ -122,14 +127,14 @@ def test_extract_security_risk_arguments_mutation():
)
)

# Test with security_risk present
# Test with security_risk present but no analyzer (should be ignored)
arguments = {"param1": "value1", "security_risk": "LOW", "param2": "value2"}
original_args = arguments.copy()

result = agent._extract_security_risk(arguments, "test_tool", False, None)

# Verify result
assert result == SecurityRisk.LOW
# Verify result is UNKNOWN when no analyzer is set (security_risk is ignored)
assert result == SecurityRisk.UNKNOWN

# Verify security_risk was popped
assert "security_risk" not in arguments
Expand Down
27 changes: 24 additions & 3 deletions tests/sdk/agent/test_security_policy_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -279,24 +279,37 @@ def _tool_response(name: str, args_json: str) -> ModelResponse:


def test_security_risk_param_ignored_when_no_analyzer():
"""Security risk param is ignored when no analyzer is configured."""
"""Security risk param is ignored when no analyzer is configured.

This test reproduces the issue from #1957 where the LLM includes
security_risk in tool calls even when llm_security_analyzer=False
and no security analyzer is configured.

Expected behavior: security_risk should be UNKNOWN when no analyzer is set.
"""
from openhands.sdk.security.risk import SecurityRisk

llm = LLM(
usage_id="test-llm",
model="test-model",
api_key=SecretStr("test-key"),
base_url="http://test",
)
agent = Agent(llm=llm, tools=[])
# Set llm_security_analyzer=False in system_prompt_kwargs
agent = Agent(
llm=llm, tools=[], system_prompt_kwargs={"llm_security_analyzer": False}
)

events = []
convo = Conversation(agent=agent, callbacks=[events.append])

# Mock LLM response that includes security_risk=HIGH even though
# llm_security_analyzer=False (the LLM might do this if it's well-trained)
with patch(
"openhands.sdk.llm.llm.litellm_completion",
return_value=_tool_response(
"think",
'{"thought": "This is a test thought", "security_risk": "LOW"}',
'{"thought": "This is a test thought", "security_risk": "HIGH"}',
),
):
convo.send_message(
Expand All @@ -306,3 +319,11 @@ def test_security_risk_param_ignored_when_no_analyzer():

# No agent errors
assert not any(isinstance(e, AgentErrorEvent) for e in events)

# Find the ActionEvent
action_events = [e for e in events if isinstance(e, ActionEvent)]
assert len(action_events) == 1

# Verify that the security_risk is UNKNOWN (ignored) when no analyzer is set
# Even though the LLM provided "HIGH", it should be ignored
assert action_events[0].security_risk == SecurityRisk.UNKNOWN
Loading