Skip to content

Commit 0805ab9

Browse files
committed
fix cloud agent tracing bugs
1 parent c080f4b commit 0805ab9

File tree

4 files changed

+104
-19
lines changed

4 files changed

+104
-19
lines changed

examples/cloud_tracing_agent.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -85,9 +85,9 @@ def main():
8585
# 6. Get token usage stats
8686
stats = agent.get_token_stats()
8787
print("\n📊 Token Usage:")
88-
print(f" Total tokens: {stats['total_tokens']}")
89-
print(f" Prompt tokens: {stats['total_prompt_tokens']}")
90-
print(f" Completion tokens: {stats['total_completion_tokens']}")
88+
print(f" Total tokens: {stats.total_tokens}")
89+
print(f" Prompt tokens: {stats.total_prompt_tokens}")
90+
print(f" Completion tokens: {stats.total_completion_tokens}")
9191

9292
except Exception as e:
9393
print(f"\n❌ Error during execution: {e}")

sentience/agent.py

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -237,7 +237,7 @@ def act( # noqa: C901
237237
self._track_tokens(goal, llm_response)
238238

239239
# Parse action from LLM response
240-
action_str = llm_response.content.strip()
240+
action_str = self._extract_action_from_response(llm_response.content)
241241

242242
# 4. EXECUTE: Parse and run action
243243
result_dict = self._execute_action(action_str, filtered_snap)
@@ -395,6 +395,34 @@ def _build_context(self, snap: Snapshot, goal: str) -> str:
395395

396396
return "\n".join(lines)
397397

398+
def _extract_action_from_response(self, response: str) -> str:
399+
"""
400+
Extract action command from LLM response, handling cases where
401+
the LLM adds extra explanation despite instructions.
402+
403+
Args:
404+
response: Raw LLM response text
405+
406+
Returns:
407+
Cleaned action command string
408+
"""
409+
import re
410+
411+
# Remove markdown code blocks if present
412+
response = re.sub(r"```[\w]*\n?", "", response)
413+
response = response.strip()
414+
415+
# Try to find action patterns in the response
416+
# Pattern matches: CLICK(123), TYPE(123, "text"), PRESS("key"), FINISH()
417+
action_pattern = r'(CLICK\s*\(\s*\d+\s*\)|TYPE\s*\(\s*\d+\s*,\s*["\'].*?["\']\s*\)|PRESS\s*\(\s*["\'].*?["\']\s*\)|FINISH\s*\(\s*\))'
418+
419+
match = re.search(action_pattern, response, re.IGNORECASE)
420+
if match:
421+
return match.group(1)
422+
423+
# If no pattern match, return the original response (will likely fail parsing)
424+
return response
425+
398426
def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
399427
"""
400428
Query LLM with standardized prompt template
@@ -418,23 +446,30 @@ def _query_llm(self, dom_context: str, goal: str) -> LLMResponse:
418446
- {{CLICKABLE}}: Element is clickable
419447
- {{color:X}}: Background color name
420448
421-
RESPONSE FORMAT:
422-
Return ONLY the function call, no explanation or markdown.
423-
424-
Available actions:
449+
CRITICAL RESPONSE FORMAT:
450+
You MUST respond with ONLY ONE of these exact action formats:
425451
- CLICK(id) - Click element by ID
426452
- TYPE(id, "text") - Type text into element
427453
- PRESS("key") - Press keyboard key (Enter, Escape, Tab, ArrowDown, etc)
428454
- FINISH() - Task complete
429455
430-
Examples:
431-
- CLICK(42)
432-
- TYPE(15, "magic mouse")
433-
- PRESS("Enter")
434-
- FINISH()
456+
DO NOT include any explanation, reasoning, or natural language.
457+
DO NOT use markdown formatting or code blocks.
458+
DO NOT say "The next step is..." or anything similar.
459+
460+
CORRECT Examples:
461+
CLICK(42)
462+
TYPE(15, "magic mouse")
463+
PRESS("Enter")
464+
FINISH()
465+
466+
INCORRECT Examples (DO NOT DO THIS):
467+
"The next step is to click..."
468+
"I will type..."
469+
```CLICK(42)```
435470
"""
436471

437-
user_prompt = "What is the next step to achieve the goal?"
472+
user_prompt = "Return the single action command:"
438473

439474
return self.llm.generate(system_prompt, user_prompt, temperature=0.0)
440475

sentience/tracing.py

Lines changed: 19 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
from abc import ABC, abstractmethod
1010
from dataclasses import dataclass, field
1111
from pathlib import Path
12-
from typing import Any, Dict, Optional, Union
12+
from typing import Any
1313

1414

1515
@dataclass
@@ -243,9 +243,24 @@ def emit_error(
243243
}
244244
self.emit("error", data, step_id=step_id)
245245

246-
def close(self) -> None:
247-
"""Close the underlying sink."""
248-
self.sink.close()
246+
def close(self, **kwargs) -> None:
247+
"""
248+
Close the underlying sink.
249+
250+
Args:
251+
**kwargs: Passed through to sink.close() (e.g., blocking=True for CloudTraceSink)
252+
"""
253+
# Check if sink.close() accepts kwargs (CloudTraceSink does, JsonlTraceSink doesn't)
254+
import inspect
255+
256+
sig = inspect.signature(self.sink.close)
257+
if any(
258+
p.kind in (inspect.Parameter.VAR_KEYWORD, inspect.Parameter.KEYWORD_ONLY)
259+
for p in sig.parameters.values()
260+
):
261+
self.sink.close(**kwargs)
262+
else:
263+
self.sink.close()
249264

250265
def __enter__(self):
251266
"""Context manager support."""

tests/test_agent.py

Lines changed: 36 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33
Tests LLM providers and SentienceAgent without requiring browser
44
"""
55

6-
from unittest.mock import MagicMock, Mock, patch
6+
from unittest.mock import Mock, patch
77

88
import pytest
99

@@ -432,3 +432,38 @@ def test_agent_action_parsing_variations():
432432
assert mock_click.call_count == 2
433433
assert mock_type.call_count == 1
434434
assert mock_press.call_count == 1
435+
436+
437+
def test_agent_extract_action_from_llm_response():
438+
"""Test extraction of action commands from LLM responses with extra text"""
439+
browser = create_mock_browser()
440+
llm = MockLLMProvider()
441+
agent = SentienceAgent(browser, llm, verbose=False)
442+
443+
# Test clean action (should pass through)
444+
assert agent._extract_action_from_response("CLICK(42)") == "CLICK(42)"
445+
assert agent._extract_action_from_response('TYPE(15, "test")') == 'TYPE(15, "test")'
446+
assert agent._extract_action_from_response('PRESS("Enter")') == 'PRESS("Enter")'
447+
assert agent._extract_action_from_response("FINISH()") == "FINISH()"
448+
449+
# Test with natural language prefix (the bug case)
450+
assert (
451+
agent._extract_action_from_response("The next step is to click the button. CLICK(42)")
452+
== "CLICK(42)"
453+
)
454+
assert (
455+
agent._extract_action_from_response(
456+
'The next step is to type "Sentience AI agent SDK" into the search field. TYPE(15, "Sentience AI agent SDK")'
457+
)
458+
== 'TYPE(15, "Sentience AI agent SDK")'
459+
)
460+
461+
# Test with markdown code blocks
462+
assert agent._extract_action_from_response("```\nCLICK(42)\n```") == "CLICK(42)"
463+
assert (
464+
agent._extract_action_from_response('```python\nTYPE(15, "test")\n```')
465+
== 'TYPE(15, "test")'
466+
)
467+
468+
# Test with explanation after action
469+
assert agent._extract_action_from_response("CLICK(42) to submit the form") == "CLICK(42)"

0 commit comments

Comments
 (0)