-
Notifications
You must be signed in to change notification settings - Fork 310
test(python): cover CLI harness log helpers #574
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,172 @@ | ||
| """Tests for shared subprocess helpers used by CLI harness providers.""" | ||
|
|
||
| from __future__ import annotations | ||
|
|
||
| import asyncio | ||
| from unittest.mock import AsyncMock, MagicMock, patch | ||
|
|
||
| import pytest | ||
|
|
||
| from agentfield.harness._cli import ( | ||
| estimate_cli_cost, | ||
| extract_final_text, | ||
| parse_jsonl, | ||
| run_cli, | ||
| strip_ansi, | ||
| ) | ||
|
|
||
|
|
||
| def test_strip_ansi_removes_colors(): | ||
| assert strip_ansi("\x1b[31mError\x1b[0m") == "Error" | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_run_cli_success(): | ||
| process = MagicMock() | ||
| process.communicate = AsyncMock(return_value=(b"OK", b"")) | ||
| process.returncode = 0 | ||
|
|
||
| create_process = AsyncMock(return_value=process) | ||
|
|
||
| with patch("asyncio.create_subprocess_exec", create_process): | ||
| stdout, stderr, returncode = await run_cli( | ||
| ["agentfield", "status"], | ||
| env={"AGENTFIELD_TEST": "1"}, | ||
| cwd=".", | ||
| timeout=1, | ||
| ) | ||
|
|
||
| assert stdout == "OK" | ||
| assert stderr == "" | ||
| assert returncode == 0 | ||
| create_process.assert_awaited_once() | ||
| _, kwargs = create_process.call_args | ||
| assert kwargs["env"]["AGENTFIELD_TEST"] == "1" | ||
| assert kwargs["cwd"] == "." | ||
| assert kwargs["stdout"] is asyncio.subprocess.PIPE | ||
| assert kwargs["stderr"] is asyncio.subprocess.PIPE | ||
|
|
||
|
|
||
| @pytest.mark.asyncio | ||
| async def test_run_cli_timeout(): | ||
| class HangingProcess: | ||
| returncode = None | ||
|
|
||
| def __init__(self) -> None: | ||
| self.killed = False | ||
| self.wait = AsyncMock(return_value=None) | ||
|
|
||
| async def communicate(self): | ||
| await asyncio.sleep(1) | ||
| return b"", b"" | ||
|
|
||
| def kill(self): | ||
| self.killed = True | ||
|
|
||
| process = HangingProcess() | ||
|
|
||
| with patch("asyncio.create_subprocess_exec", AsyncMock(return_value=process)): | ||
| with pytest.raises(TimeoutError, match="CLI command timed out"): | ||
| await run_cli(["agentfield", "hang"], timeout=0.01) | ||
|
|
||
| assert process.killed is True | ||
| process.wait.assert_awaited_once() | ||
|
|
||
|
|
||
| def test_parse_jsonl_skips_invalid(): | ||
| events = parse_jsonl('{"type":"a"}\nnot-json\n{"type":"b"}') | ||
|
|
||
| assert events == [{"type": "a"}, {"type": "b"}] | ||
|
|
||
|
|
||
| def test_extract_final_text_codex_style(): | ||
| events = [ | ||
| {"type": "item.completed", "item": {"type": "agent_message", "text": "first"}}, | ||
| { | ||
| "type": "item.completed", | ||
| "item": {"type": "agent_message", "text": "final answer"}, | ||
| }, | ||
| ] | ||
|
|
||
| assert extract_final_text(events) == "final answer" | ||
|
|
||
|
|
||
| @pytest.mark.parametrize( | ||
| ("events", "expected"), | ||
| [ | ||
| ([{"type": "result", "result": "result answer"}], "result answer"), | ||
| ([{"type": "result", "text": "text answer"}], "text answer"), | ||
| ([{"type": "turn.completed", "text": "turn answer"}], "turn answer"), | ||
| ([{"type": "message", "content": "message answer"}], "message answer"), | ||
| ([{"type": "assistant", "text": "assistant answer"}], "assistant answer"), | ||
| ], | ||
| ) | ||
| def test_extract_final_text_event_variants(events, expected): | ||
| assert extract_final_text(events) == expected | ||
|
|
||
|
|
||
| def test_extract_final_text_empty_events(): | ||
| assert extract_final_text([]) is None | ||
|
|
||
|
|
||
| def test_estimate_cli_cost_calls_litellm(): | ||
|
Member
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. 🟧 [HIGH] Source ( Suggested additions:
🤖 Reviewed by AgentField PR Review Harness |
||
| mock_litellm = MagicMock() | ||
| mock_litellm.completion_cost.return_value = 0.05 | ||
|
|
||
| with patch.dict("sys.modules", {"litellm": mock_litellm}): | ||
| cost = estimate_cli_cost( | ||
| model="openai/gpt-4o", | ||
| prompt="Summarize this run", | ||
| result_text="Done", | ||
| ) | ||
|
|
||
| assert cost == 0.05 | ||
| mock_litellm.completion_cost.assert_called_once_with( | ||
| model="openai/gpt-4o", | ||
| prompt="Summarize this run", | ||
| completion="Done", | ||
| ) | ||
|
|
||
|
|
||
| def test_estimate_cli_cost_returns_none_without_model(): | ||
| assert estimate_cli_cost(model="", prompt="prompt", result_text="Done") is None | ||
|
|
||
|
|
||
| def test_estimate_cli_cost_returns_none_when_litellm_missing(): | ||
| with patch.dict("sys.modules", {"litellm": None}): | ||
| cost = estimate_cli_cost( | ||
| model="openai/gpt-4o", | ||
| prompt="Summarize this run", | ||
| result_text="Done", | ||
| ) | ||
|
|
||
| assert cost is None | ||
|
|
||
|
|
||
| @pytest.mark.parametrize("raw_cost", [0, None]) | ||
| def test_estimate_cli_cost_returns_none_for_non_positive_cost(raw_cost): | ||
| mock_litellm = MagicMock() | ||
| mock_litellm.completion_cost.return_value = raw_cost | ||
|
|
||
| with patch.dict("sys.modules", {"litellm": mock_litellm}): | ||
| cost = estimate_cli_cost( | ||
| model="openai/gpt-4o", | ||
| prompt="Summarize this run", | ||
| result_text="Done", | ||
| ) | ||
|
|
||
| assert cost is None | ||
|
|
||
|
|
||
| def test_estimate_cli_cost_returns_none_when_litellm_raises(): | ||
| mock_litellm = MagicMock() | ||
| mock_litellm.completion_cost.side_effect = RuntimeError("pricing unavailable") | ||
|
|
||
| with patch.dict("sys.modules", {"litellm": mock_litellm}): | ||
| cost = estimate_cli_cost( | ||
| model="openai/gpt-4o", | ||
| prompt="Summarize this run", | ||
| result_text="Done", | ||
| ) | ||
|
|
||
| assert cost is None | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
🟧 [HIGH]
extract_final_textonly tests 1 of 4 event-type branchesextract_final_textinagentfield/harness/_cli.py:68-98handles four event types:item.completed(Codex) ← tested hereresult← untestedturn.completed← untestedmessage/assistant← untestedUntested branches in a "final answer extraction" path can silently return
Noneor wrong text in production for non-Codex CLI providers. Suggest adding a case per event type plus an empty-events case.🤖 Reviewed by AgentField PR Review Harness