Skip to content

Commit cefda44

Browse files
moonbox3CopilotCopilot
authored
Python: Emit TOOL_CALL_RESULT events when resuming after tool approval (#4758)
* Emit TOOL_CALL_RESULT events on approval resume (#4589) When a tool call is approved via the interrupt/resume flow, _resolve_approval_responses executes the tool and injects the result into the messages array, but no TOOL_CALL_RESULT SSE event was yielded to the client. Changes: - _resolve_approval_responses now returns the list of resolved function_result Content objects instead of None - run_agent_stream yields ToolCallResultEvent for each resolved approval result after RunStartedEvent is emitted - Add ToolCallResultEvent to ag_ui.core imports in _agent_run.py Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Apply pre-commit auto-fixes * fix(ag-ui): address PR review feedback for #4589 1. _resolve_approval_responses now returns only approved results (not rejections) so TOOL_CALL_RESULT events are emitted only for executed tools. Rejection results are still written into message history. 2. Emit resolved TOOL_CALL_RESULT events in the no-updates fallback RUN_STARTED path so approval results are never lost. 3. Rewrite tests to use real FunctionTool with func and approval_mode='always_require' via StubAgent default_options, verifying actual tool execution output in TOOL_CALL_RESULT content. Added test for rejection not emitting TOOL_CALL_RESULT. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Fix #4589: clean up approval resolution and add missing tests - Extract duplicated TOOL_CALL_RESULT emission block into _make_approval_tool_result_events helper to prevent drift - Remove dead rejection_results construction in _resolve_approval_responses; _replace_approval_contents_with_results already handles rejections inline - Pass only approved_results (not all_results) to clarify the contract - Add mixed approve/reject test validating the core splitting logic - Add zero-updates test covering the no-updates fallback emission path - Add direct unit test for _resolve_approval_responses return value Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> * Apply pre-commit auto-fixes * Fix import sorting lint error in test_approval_result_event.py Add blank line between first-party and third-party import groups to satisfy ruff I001 rule. Fixes #4589 Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com> --------- Co-authored-by: Copilot <copilot@github.com> Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 4afc088 commit cefda44

2 files changed

Lines changed: 492 additions & 17 deletions

File tree

python/packages/ag-ui/agent_framework_ag_ui/_agent_run.py

Lines changed: 42 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@
2121
TextMessageStartEvent,
2222
ToolCallArgsEvent,
2323
ToolCallEndEvent,
24+
ToolCallResultEvent,
2425
ToolCallStartEvent,
2526
)
2627
from agent_framework import (
@@ -369,6 +370,24 @@ def _handle_step_based_approval(messages: list[Any]) -> list[BaseEvent]:
369370
return events
370371

371372

373+
def _make_approval_tool_result_events(resolved_approval_results: list[Content]) -> list[ToolCallResultEvent]:
374+
"""Build TOOL_CALL_RESULT events for tools executed during approval resolution."""
375+
events: list[ToolCallResultEvent] = []
376+
for resolved in resolved_approval_results:
377+
if resolved.call_id:
378+
raw = resolved.result if resolved.result is not None else ""
379+
result_str = raw if isinstance(raw, str) else json.dumps(make_json_safe(raw))
380+
events.append(
381+
ToolCallResultEvent(
382+
message_id=generate_event_id(),
383+
tool_call_id=resolved.call_id,
384+
content=result_str,
385+
role="tool",
386+
)
387+
)
388+
return events
389+
390+
372391
def _evict_oldest_approvals(registry: dict[str, str], max_size: int = 10_000) -> None:
373392
"""Evict the oldest entries from the pending-approvals registry (LRU).
374393
@@ -391,7 +410,7 @@ async def _resolve_approval_responses(
391410
run_kwargs: dict[str, Any],
392411
pending_approvals: dict[str, str] | None = None,
393412
thread_id: str = "",
394-
) -> None:
413+
) -> list[Content]:
395414
"""Execute approved function calls and replace approval content with results.
396415
397416
This modifies the messages list in place, replacing function_approval_response
@@ -407,10 +426,16 @@ async def _resolve_approval_responses(
407426
When provided, every approval response is validated against this
408427
registry to prevent bypass, function name spoofing, and replay.
409428
thread_id: The conversation thread ID used to scope registry keys.
429+
430+
Returns:
431+
List of approved function_result Content objects only (empty if no
432+
approvals). Rejection results are written into the message history
433+
but are *not* included in the return value because they should not
434+
be emitted as TOOL_CALL_RESULT events.
410435
"""
411436
fcc_todo = _collect_approval_responses(messages)
412437
if not fcc_todo:
413-
return
438+
return []
414439

415440
approved_responses = [resp for resp in fcc_todo.values() if resp.approved]
416441
rejected_responses = [resp for resp in fcc_todo.values() if not resp.approved]
@@ -493,38 +518,32 @@ async def _resolve_approval_responses(
493518
logger.exception("Failed to execute approved tool calls; injecting error results: %s", e)
494519
approved_function_results = []
495520

496-
# Build normalized results for approved responses
497-
normalized_results: list[Content] = []
521+
# Build results for approved responses (used for TOOL_CALL_RESULT event emission)
522+
approved_results: list[Content] = []
498523
for idx, approval in enumerate(approved_responses):
499524
if (
500525
idx < len(approved_function_results)
501526
and getattr(approved_function_results[idx], "type", None) == "function_result"
502527
):
503-
normalized_results.append(approved_function_results[idx])
528+
approved_results.append(approved_function_results[idx])
504529
continue
505530
# Get call_id from function_call if present, otherwise use approval.id
506531
func_call = approval.function_call
507532
call_id = (func_call.call_id if func_call else None) or approval.id or ""
508-
normalized_results.append(
533+
approved_results.append(
509534
Content.from_function_result(call_id=call_id, result="Error: Tool call invocation failed.")
510535
)
511536

512-
# Build rejection results
513-
for rejection in rejected_responses:
514-
func_call = rejection.function_call
515-
call_id = (func_call.call_id if func_call else None) or rejection.id or ""
516-
normalized_results.append(
517-
Content.from_function_result(call_id=call_id, result="Error: Tool call invocation was rejected by user.")
518-
)
519-
520-
_replace_approval_contents_with_results(messages, fcc_todo, normalized_results) # type: ignore
537+
_replace_approval_contents_with_results(messages, fcc_todo, approved_results) # type: ignore
521538

522539
# Post-process: Convert user messages with function_result content to proper tool messages.
523540
# After _replace_approval_contents_with_results, approved tool calls have their results
524541
# placed in user messages. OpenAI requires tool results to be in role="tool" messages.
525542
# This transformation ensures the message history is valid for the LLM provider.
526543
_convert_approval_results_to_tool_messages(messages)
527544

545+
return approved_results
546+
528547

529548
def _convert_approval_results_to_tool_messages(messages: list[Message]) -> None:
530549
"""Convert function_result content in user messages to proper tool messages.
@@ -787,7 +806,9 @@ async def run_agent_stream(
787806
# Resolve approval responses (execute approved tools, replace approvals with results)
788807
# This must happen before running the agent so it sees the tool results
789808
tools_for_execution = tools if tools is not None else server_tools
790-
await _resolve_approval_responses(messages, tools_for_execution, agent, run_kwargs, pending_approvals, thread_id)
809+
resolved_approval_results = await _resolve_approval_responses(
810+
messages, tools_for_execution, agent, run_kwargs, pending_approvals, thread_id
811+
)
791812

792813
# Defense-in-depth: replace approval payloads in snapshot with actual tool results
793814
# so CopilotKit does not re-send stale approval content on subsequent turns.
@@ -851,6 +872,9 @@ async def run_agent_stream(
851872
yield StateSnapshotEvent(snapshot=flow.current_state)
852873
run_started_emitted = True
853874

875+
for event in _make_approval_tool_result_events(resolved_approval_results):
876+
yield event
877+
854878
# Feature #4: Detect tool-only messages (no text content)
855879
# Emit TextMessageStartEvent to create message context for tool calls
856880
if not flow.message_id and _has_only_tool_calls(update.contents):
@@ -905,7 +929,8 @@ async def run_agent_stream(
905929
if state_schema and flow.current_state:
906930
yield StateSnapshotEvent(snapshot=flow.current_state)
907931

908-
# Process structured output if response_format is set
932+
for event in _make_approval_tool_result_events(resolved_approval_results):
933+
yield event
909934
if response_format is not None and all_updates:
910935
from agent_framework import AgentResponse
911936
from pydantic import BaseModel

0 commit comments

Comments
 (0)