Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 17 additions & 13 deletions app/nominal_code/agent/api/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@
ToolUseBlock,
)
from nominal_code.llm.provider import LLMProvider, ProviderError
from nominal_code.models import ProviderName
from nominal_code.models import ErrorType, InvocationError, ProviderName

MAX_RESPONSE_TOKENS: int = 16384
COMPACTION_TOKEN_THRESHOLD: int = 100_000
Expand Down Expand Up @@ -83,8 +83,9 @@ async def run_api_agent(

On the last turn (when ``max_turns`` is set), a warning is injected
instructing the model to call ``submit_review`` immediately. If
``max_turns`` is reached without ``submit_review``, the result has
``exhausted_without_review=True``.
``max_turns`` is reached, the result has ``max_turns_reached=True``
so callers can distinguish a budget exhaustion from a clean
completion (output may be empty or partial in that case).

Args:
prompt (str): The user's prompt to pass to the agent.
Expand Down Expand Up @@ -187,8 +188,7 @@ async def run_api_agent(
duration_ms: int = _now_ms() - start_time

return AgentResult(
output=output or "Done, no output.",
is_error=False,
output=output,
num_turns=turns,
duration_ms=duration_ms,
messages=tuple(messages),
Expand All @@ -215,7 +215,6 @@ async def run_api_agent(

return AgentResult(
output=json.dumps(block.input),
is_error=False,
num_turns=turns,
duration_ms=duration_ms,
messages=tuple(messages),
Expand Down Expand Up @@ -285,8 +284,7 @@ async def run_api_agent(
duration_ms = _now_ms() - start_time

return AgentResult(
output=output or "Max turns reached.",
is_error=False,
output=output,
num_turns=turns,
duration_ms=duration_ms,
messages=tuple(messages),
Expand All @@ -297,7 +295,7 @@ async def run_api_agent(
provider=provider_name,
num_api_calls=api_call_count,
),
exhausted_without_review=has_submit_review,
max_turns_reached=True,
sub_agent_costs=tuple(collected_sub_agent_costs),
)

Expand All @@ -307,8 +305,7 @@ async def run_api_agent(
logger.exception("LLM provider error")

return AgentResult(
output=f"API error: {exc}",
is_error=True,
output="",
num_turns=turns,
duration_ms=duration_ms,
cost=build_cost_summary(
Expand All @@ -317,6 +314,10 @@ async def run_api_agent(
provider=provider_name,
num_api_calls=api_call_count,
),
error=InvocationError(
type=ErrorType.PROVIDER_ERROR,
message=str(exc),
),
)

except Exception as exc:
Expand All @@ -325,8 +326,7 @@ async def run_api_agent(
logger.exception("Unexpected error in API runner")

return AgentResult(
output=f"Unexpected error: {exc}",
is_error=True,
output="",
num_turns=turns,
duration_ms=duration_ms,
cost=build_cost_summary(
Expand All @@ -335,6 +335,10 @@ async def run_api_agent(
provider=provider_name,
num_api_calls=api_call_count,
),
error=InvocationError(
type=ErrorType.RUNTIME_ERROR,
message=str(exc),
),
)


Expand Down
43 changes: 31 additions & 12 deletions app/nominal_code/agent/cli/runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@

from nominal_code.agent.result import AgentResult
from nominal_code.llm.cost import CostSummary
from nominal_code.models import ProviderName
from nominal_code.models import ErrorType, InvocationError, ProviderName

CONVERSATION_ID_INIT_SUBTYPE: str = "init"
MAX_TOOL_RESULT_LOG_LENGTH: int = 500
Expand Down Expand Up @@ -131,7 +131,7 @@ async def run_cli_agent(
returned_conversation_id = message.data.get("session_id", None)

if isinstance(message, ResultMessage):
output: str = message.result or "Done, no output."
output: str = message.result or ""
returned_conversation_id = message.session_id or returned_conversation_id

cli_cost: CostSummary | None = None
Expand All @@ -156,24 +156,43 @@ async def run_cli_agent(
model=options.model or "",
)

result = AgentResult(
output=output,
is_error=message.is_error,
num_turns=message.num_turns,
duration_ms=message.duration_ms,
conversation_id=returned_conversation_id,
cost=cli_cost,
)
# On error the SDK puts the failure description in
# ``output``; route it to ``error.message`` and leave
# ``output`` empty so the success-vs-failure separation is
# structural rather than convention.
if message.is_error:
result = AgentResult(
output="",
num_turns=message.num_turns,
duration_ms=message.duration_ms,
conversation_id=returned_conversation_id,
cost=cli_cost,
error=InvocationError(
type=ErrorType.RUNTIME_ERROR,
message=output,
),
)
else:
result = AgentResult(
output=output,
num_turns=message.num_turns,
duration_ms=message.duration_ms,
conversation_id=returned_conversation_id,
cost=cli_cost,
)

if result is not None:
return result

return AgentResult(
output="No result received from the agent.",
is_error=True,
output="",
num_turns=0,
duration_ms=0,
conversation_id=returned_conversation_id,
error=InvocationError(
type=ErrorType.RUNTIME_ERROR,
message="No result received from the agent.",
),
)


Expand Down
2 changes: 1 addition & 1 deletion app/nominal_code/agent/invoke.py
Original file line number Diff line number Diff line change
Expand Up @@ -185,7 +185,7 @@ def save_conversation(

if (
isinstance(agent_config, ApiAgentConfig)
and not result.is_error
and result.error is None
and result.messages
):
conversation_store.set_messages(
Expand Down
32 changes: 22 additions & 10 deletions app/nominal_code/agent/result.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import TYPE_CHECKING

from nominal_code.llm.messages import Message
from nominal_code.models import InvocationError

if TYPE_CHECKING:
from nominal_code.llm.cost import CostSummary
Expand All @@ -15,27 +16,38 @@ class AgentResult:
Result from an agent invocation.

Attributes:
output (str): The text output from the agent.
is_error (bool): Whether the invocation ended in error.
output (str): The text the model produced. Populated on success
(returned text response or serialized ``submit_review``
input). Empty when ``error is not None`` — there is no
model output to record on failure; the failure details
live on ``error``.
num_turns (int): Number of agentic turns taken.
duration_ms (int): Wall-clock duration in milliseconds.
conversation_id (str | None): Continuation token for the conversation.
Maps to a CLI conversation ID or a provider response ID.
messages (tuple[Message, ...]): Full message history from the API
runner. Empty for CLI runner.
conversation_id (str | None): Continuation token for the
conversation. Maps to a CLI conversation ID or a provider
response ID.
messages (tuple[Message, ...]): Full message history from the
API runner. Empty for CLI runner and for failures that
originate before any assistant turn completes.
cost (CostSummary | None): Cost information for the invocation.
exhausted_without_review (bool): True when max_turns was reached
without the model calling ``submit_review``.
max_turns_reached (bool): True when the agent loop stopped
because it hit the configured ``max_turns`` budget. Not an
error — the invocation completed normally — but consumers
(notably the reviewer's notes-based fallback) need this
signal because ``output`` may be empty or partial when the
cap fires mid-tool-use.
sub_agent_costs (tuple[CostSummary, ...]): Cost summaries from
sub-agents spawned via the Agent tool during this run.
error (InvocationError | None): ``None`` on success; otherwise
wraps the failure classification and message.
"""

output: str
is_error: bool
num_turns: int
duration_ms: int
conversation_id: str | None = None
messages: tuple[Message, ...] = ()
cost: CostSummary | None = None
exhausted_without_review: bool = False
max_turns_reached: bool = False
sub_agent_costs: tuple[CostSummary, ...] = ()
error: InvocationError | None = None
34 changes: 34 additions & 0 deletions app/nominal_code/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,40 @@ class FileStatus(StrEnum):
UNCHANGED = "unchanged"


class ErrorType(StrEnum):
"""
Classification of an agent / review failure.

Values:
PROVIDER_ERROR: LLM provider raised an error.
RUNTIME_ERROR: Unexpected exception escaped the agent loop
(tool dispatch failure, parsing crash, etc.).
PARSE_ERROR: Agent returned successfully but its output could
not be parsed as a structured review and JSON repair did
not recover it.
"""

PROVIDER_ERROR = "provider_error"
RUNTIME_ERROR = "runtime_error"
PARSE_ERROR = "parse_error"


@dataclass(frozen=True)
class InvocationError:
"""
Failure produced by an agent invocation or a review build.

Attributes:
type (ErrorType): Classification of the failure.
message (str): Underlying exception text or short description
of the failure, preserved verbatim so callers can route it
to logs / metrics.
"""

type: ErrorType
message: str = ""


@dataclass(frozen=True)
class ReviewFinding:
"""
Expand Down
25 changes: 24 additions & 1 deletion app/nominal_code/review/reviewer.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from nominal_code.llm.registry import create_provider
from nominal_code.models import (
ChangedFile,
ErrorType,
InvocationError,
ReviewFinding,
)
from nominal_code.platforms.base import (
Expand Down Expand Up @@ -122,6 +124,8 @@ class ReviewResult:
reviewer agent — ``reviewer_base_system_prompt`` plus the
tag-wrapped ``coding_guidelines``. This is what the reviewer
LLM actually saw.
error (InvocationError | None): ``None`` on success; otherwise
wraps the failure classification and message.
"""

agent_review: AgentReview | None
Expand All @@ -139,6 +143,7 @@ class ReviewResult:
coding_guidelines: str = ""
notes: str = ""
reviewer_system_prompt: str = ""
error: InvocationError | None = None


@dataclass(frozen=True)
Expand Down Expand Up @@ -384,7 +389,7 @@ async def review(
sub_agent_configs=sub_agent_configs,
)

if result.exhausted_without_review:
if result.max_turns_reached:
logger.warning(
"Reviewer exhausted turns for %s#%d, "
"falling back to notes-based review",
Expand Down Expand Up @@ -455,8 +460,25 @@ async def review(
event.pr_number,
)

# IMPORTANT: ``raw_output`` here is what gets posted to the PR
# via ``post_review_result`` — keep it as the generic fallback
# comment regardless of why the agent failed. The structured
# ``error`` field below carries the underlying cause (e.g.
# "Gemini 503") for callers to consume internally; it must
# never leak into the PR comment.
fallback_comment: str = build_fallback_comment(raw_output=result.output)

# Agent-level failures (provider / runtime) flow through
# unchanged. If the agent succeeded, the failure is ours: parse
# + repair both came up empty.
error: InvocationError = result.error or InvocationError(
type=ErrorType.PARSE_ERROR,
message=(
"Reviewer output could not be parsed as structured "
"JSON, and JSON repair did not recover a valid review."
),
)

return ReviewResult(
agent_review=None,
valid_findings=[],
Expand All @@ -473,6 +495,7 @@ async def review(
coding_guidelines=effective_guidelines,
notes=captured_notes,
reviewer_system_prompt=combined_system_prompt,
error=error,
)

if scope is ReviewScope.CODEBASE:
Expand Down
Loading
Loading