Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
74 changes: 8 additions & 66 deletions src/foundation/live_turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,6 @@
_REFRESH_PER_SECOND = 8
_MAX_COMPLETED_DETAIL = 12
_TOGGLE_KEY = "?"
_SOFT_STALE_SECONDS = 15.0
_HARD_STALE_SECONDS = 60.0


class LivePhase(enum.Enum):
Expand Down Expand Up @@ -231,37 +229,13 @@ def _truncate(value: str, *, limit: int) -> str:
return value[: max(limit - 1, 1)] + "…"


def _stale_status(state: TurnLiveState, *, now: float) -> Text | None:
if state.last_event_at <= 0:
return None
if state.phase in {
LivePhase.WAITING_APPROVAL,
LivePhase.WAITING_USER,
LivePhase.COMPLETED,
LivePhase.FAILED,
LivePhase.CANCELLED,
}:
return None
age = max(now - state.last_event_at, 0.0)
if age >= _HARD_STALE_SECONDS:
return Text(
f"No live events for {_format_duration(age)} · model may still be running · "
"Ctrl-C to cancel",
style="yellow",
)
if age >= _SOFT_STALE_SECONDS:
return Text(f"Still waiting on model · no events for {_format_duration(age)}", style="cyan")
return None


def render_status_line(
state: TurnLiveState,
*,
elapsed_seconds: float,
now: float | None = None,
) -> RenderableType:
"""One-line status (collapsed mode)."""
now_value = time.monotonic() if now is None else now
if state.finished or state.phase in {
LivePhase.COMPLETED,
LivePhase.FAILED,
Expand All @@ -270,51 +244,19 @@ def render_status_line(
verb = state.final_status or "done"
style = "red" if state.phase is LivePhase.FAILED else "green"
return Text(f"✓ {verb} · {_format_duration(elapsed_seconds)}", style=style)
stale = _stale_status(state, now=now_value)
if stale is not None:
return stale
if state.awaiting_approval or state.phase is LivePhase.WAITING_APPROVAL:
text = Text("Waiting for approval", style="yellow")
if state.approval_summary:
text.append(f" · {state.approval_summary}", style="yellow")
return text
return Text("Approval needed", style="yellow")
if state.awaiting_input or state.phase is LivePhase.WAITING_USER:
text = Text("Waiting for your answer", style="yellow")
if state.question_summary:
text.append(f" · {state.question_summary}", style="yellow")
return text
return Text("Answer needed", style="yellow")
if state.current_action_id is not None or state.phase is LivePhase.RUNNING_TOOL:
action_elapsed = (
now_value - state.current_action_started_at
if state.current_action_started_at is not None
else 0.0
)
descriptor = state.current_action_tool or "tool"
return Text(
f"Running {descriptor} · iter {state.iteration} · {_format_duration(action_elapsed)}",
style="cyan",
)
return Text("Working", style="cyan")
if state.planning_started_at is not None or state.phase is LivePhase.PLANNING:
plan_elapsed = (
now_value - state.planning_started_at
if state.planning_started_at is not None
else elapsed_seconds
)
return Text(
f"planning iteration {state.iteration} · {_format_duration(plan_elapsed)}",
style="cyan",
)
return Text("Planning", style="cyan")
if state.phase is LivePhase.OBSERVING:
return Text(
f"Observing result · iter {state.iteration} · {_format_duration(elapsed_seconds)}",
style="cyan",
)
if state.iteration > 0:
return Text(
f"Thinking · iteration {state.iteration} · {_format_duration(elapsed_seconds)}",
style="cyan",
)
return Text("Starting turn", style="cyan")
return Text("Checking", style="cyan")
if state.phase is LivePhase.THINKING or state.iteration > 0:
return Text("Thinking", style="cyan")
return Text("Starting", style="cyan")


def render_detail_panel(state: TurnLiveState, *, elapsed_seconds: float) -> RenderableType:
Expand Down
1 change: 1 addition & 0 deletions src/foundation/models/orchestration.py
Original file line number Diff line number Diff line change
Expand Up @@ -104,6 +104,7 @@ class LoopStopReason(StrEnum):
ZERO_ACTION_PLAN = "zero_action_plan"
PENDING_APPROVAL = "pending_approval"
AWAITING_USER_INPUT = "awaiting_user_input"
BLOCKED = "blocked"
FATAL_EXECUTION_FAILURE = "fatal_execution_failure"
MAX_ITERATIONS = "max_iterations"
MAX_ACTIONS = "max_actions"
Expand Down
43 changes: 39 additions & 4 deletions src/foundation/services/orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,12 @@
"no such file or directory",
}
)
_TERMINAL_BLOCKED_ERROR_PATTERNS = frozenset(
{
"denied by the user",
"not approved",
}
)

# Heuristic intent markers used by the commit-approval runtime invariant.
# When the user's message contains "commit" as a whole word, or names any of
Expand Down Expand Up @@ -189,6 +195,7 @@ def _worst_verification_outcome(
LoopStopReason.AWAITING_USER_INPUT: (
"\n\n[Loop stopped: waiting for your answer to a question before continuing.]"
),
LoopStopReason.BLOCKED: ("\n\n[Loop stopped: an action was blocked before continuing.]"),
LoopStopReason.FATAL_EXECUTION_FAILURE: (
"\n\n[Loop stopped: a fatal execution failure occurred.]"
),
Expand Down Expand Up @@ -1003,12 +1010,13 @@ def _run_replan_loop(
iteration=iteration_index,
)
)
total_actions_executed += len(actions_to_execute)
attempted_actions = actions_to_execute[: len(execution_results)]
total_actions_executed += len(attempted_actions)
prev_last_step_id = last_step_id

# 6. Track mutations and verification
iter_changed, iter_code_change, iter_outcome, iter_verify_cmds = self._classify_results(
execution_results, actions_to_execute
execution_results, attempted_actions
)
had_code_changes = had_code_changes or iter_code_change
verification_outcome = _worst_verification_outcome(
Expand All @@ -1027,7 +1035,7 @@ def _run_replan_loop(
# tool calls (file writes, git mutations) so the planner sees
# them and won't re-issue.
for action, result in zip(
actions_to_execute,
attempted_actions,
execution_results,
strict=True,
):
Expand Down Expand Up @@ -1055,12 +1063,17 @@ def _run_replan_loop(
has_awaiting_input = any(
r.status is ExecutionStatus.AWAITING_INPUT for r in execution_results
)
has_terminal_blocked = any(
self._is_terminal_blocked_result(r) for r in execution_results
)
has_fatal = any(self._is_fatal_result(r) for r in execution_results)

if has_pending:
stop_reason = LoopStopReason.PENDING_APPROVAL
elif has_awaiting_input:
stop_reason = LoopStopReason.AWAITING_USER_INPUT
elif has_terminal_blocked:
stop_reason = LoopStopReason.BLOCKED
elif has_fatal:
stop_reason = LoopStopReason.FATAL_EXECUTION_FAILURE
elif total_actions_executed >= _MAX_TOTAL_ACTIONS:
Expand All @@ -1072,7 +1085,7 @@ def _run_replan_loop(
observation = self._build_observation(
iteration_index,
execution_results,
actions_to_execute,
attempted_actions,
iter_changed,
remaining_iterations=_MAX_LOOP_ITERATIONS - iteration_index,
remaining_actions=_MAX_TOTAL_ACTIONS - total_actions_executed,
Expand Down Expand Up @@ -1348,6 +1361,17 @@ def _execute_iteration_actions(
iteration=iteration,
)
prior_step_id = last_step_id
result = execution.execution_result
if (
result.status
in {
ExecutionStatus.PENDING_APPROVAL,
ExecutionStatus.AWAITING_INPUT,
}
or self._is_terminal_blocked_result(result)
or self._is_fatal_result(result)
):
break

return execution_results, decisions, evaluations, last_step_id

Expand Down Expand Up @@ -1532,6 +1556,15 @@ def _classify_results(

return changed_paths, had_code_changes, iter_outcome, verify_cmds

@staticmethod
def _is_terminal_blocked_result(result: ExecutionResult) -> bool:
if result.status is not ExecutionStatus.BLOCKED:
return False
if result.error is None:
return False
error_lower = result.error.lower()
return any(p in error_lower for p in _TERMINAL_BLOCKED_ERROR_PATTERNS)

@staticmethod
def _is_fatal_result(result: ExecutionResult) -> bool:
if result.status is not ExecutionStatus.FAILED:
Expand Down Expand Up @@ -1758,6 +1791,8 @@ def _session_status_for_result(
# Stopped to ask the user something we couldn't prompt for inline
# (non-interactive run, or the user dismissed the prompt).
return SessionStatus.COMPLETED_INCONCLUSIVE
if stop_reason is LoopStopReason.BLOCKED:
return SessionStatus.FAILED
if stop_reason is LoopStopReason.FATAL_EXECUTION_FAILURE:
return SessionStatus.FAILED
if stop_reason is LoopStopReason.ZERO_ACTION_PLAN:
Expand Down
12 changes: 4 additions & 8 deletions tests/test_live_turn.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,15 +219,13 @@ def test_render_status_line_shows_running_action():
current_action_started_at=0.0,
)
text = _render_to_text(render_status_line(state, elapsed_seconds=1.5))
assert "iter 2" in text
assert "foundation.git.commit" in text
assert text.strip() == "Working"


def test_render_status_line_shows_planning_when_no_action():
state = TurnLiveState(iteration=1, planning_started_at=0.0)
text = _render_to_text(render_status_line(state, elapsed_seconds=0.4))
assert "planning" in text
assert "iteration 1" in text
assert text.strip() == "Planning"


def test_render_status_line_shows_stale_without_mutating_phase():
Expand All @@ -240,8 +238,7 @@ def test_render_status_line_shows_stale_without_mutating_phase():

text = _render_to_text(render_status_line(state, elapsed_seconds=18.0, now=28.0))

assert "Still waiting on model" in text
assert "no events for 18.0s" in text
assert text.strip() == "Thinking"
assert state.phase is LivePhase.THINKING


Expand All @@ -254,8 +251,7 @@ def test_render_status_line_shows_hard_stale():

text = _render_to_text(render_status_line(state, elapsed_seconds=67.0, now=77.0))

assert "No live events for 1m07s" in text
assert "Ctrl-C to cancel" in text
assert text.strip() == "Thinking"


def test_render_status_line_shows_done_when_finished():
Expand Down
76 changes: 76 additions & 0 deletions tests/test_orchestrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -1234,6 +1234,82 @@ def test_orchestrator_blocks_risky_shell_commands_when_prompt_is_denied(
assert not (workspace_root / "denied.txt").exists()


def test_orchestrator_stops_after_denied_approval_without_replanning(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
) -> None:
provider = StubProvider(
[
_provider_response(
{
"assistant_message": "I can create that file after approval.",
"actions": [
{
"id": "create_file",
"kind": "shell",
"summary": "Create a file in the workspace",
"shell": {
"command": "touch",
"args": ["denied.txt"],
},
},
{
"id": "write_retry",
"kind": "tool_call",
"summary": "Write retry marker",
"tool_call": {
"capability_id": "foundation.file.write",
"arguments": {
"path": "retry.txt",
"content": "should not happen\n",
},
},
},
],
}
),
_provider_response(
{
"assistant_message": "Trying a different path.",
"actions": [
{
"id": "write_second_plan",
"kind": "tool_call",
"summary": "Write second-plan marker",
"tool_call": {
"capability_id": "foundation.file.write",
"arguments": {
"path": "second-plan.txt",
"content": "should not happen either\n",
},
},
}
],
}
),
]
)
orchestrator, runtime, workspace_root = _orchestrator(
tmp_path,
monkeypatch,
provider,
approval_service=ApprovalService(
mode=ApprovalMode.PROMPT,
prompt_callback=lambda _request: False,
),
)

result = orchestrator.orchestrate(UserRequest(message="create denied.txt"))

assert len(provider.calls) == 1
assert runtime.calls == 0
assert result.stop_reason is LoopStopReason.BLOCKED
assert result.execution_results[0].status is ExecutionStatus.BLOCKED
assert not (workspace_root / "denied.txt").exists()
assert not (workspace_root / "retry.txt").exists()
assert not (workspace_root / "second-plan.txt").exists()


def test_orchestrator_persists_sessions_commands_and_summaries(
tmp_path: Path,
monkeypatch: pytest.MonkeyPatch,
Expand Down
Loading