Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 28 additions & 4 deletions sentience/action_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,11 +6,12 @@
"""

import re
from typing import Any
from typing import Any, Union

from .actions import click, click_async, press, press_async, type_text, type_text_async
from .browser import AsyncSentienceBrowser, SentienceBrowser
from .models import Snapshot
from .protocols import AsyncBrowserProtocol, BrowserProtocol


class ActionExecutor:
Expand All @@ -23,15 +24,38 @@ class ActionExecutor:
- Handle action parsing errors consistently
"""

def __init__(self, browser: SentienceBrowser | AsyncSentienceBrowser):
def __init__(
self,
browser: SentienceBrowser | AsyncSentienceBrowser | BrowserProtocol | AsyncBrowserProtocol,
):
"""
Initialize action executor.

Args:
browser: SentienceBrowser or AsyncSentienceBrowser instance
browser: SentienceBrowser, AsyncSentienceBrowser, or protocol-compatible instance
(for testing, can use mock objects that implement BrowserProtocol)
"""
self.browser = browser
self._is_async = isinstance(browser, AsyncSentienceBrowser)
# Check if browser is async - support both concrete types and protocols
# Check concrete types first (most reliable)
if isinstance(browser, AsyncSentienceBrowser):
self._is_async = True
elif isinstance(browser, SentienceBrowser):
self._is_async = False
else:
# For protocol-based browsers, check if methods are actually async
# This is more reliable than isinstance checks which can match both protocols
import inspect

start_method = getattr(browser, "start", None)
if start_method and inspect.iscoroutinefunction(start_method):
self._is_async = True
elif isinstance(browser, BrowserProtocol):
# If it implements BrowserProtocol and start is not async, it's sync
self._is_async = False
else:
# Default to sync for unknown types
self._is_async = False

def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
"""
Expand Down
115 changes: 100 additions & 15 deletions sentience/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
import asyncio
import hashlib
import time
from typing import TYPE_CHECKING, Any, Optional
from typing import TYPE_CHECKING, Any, Optional, Union

from .action_executor import ActionExecutor
from .agent_config import AgentConfig
Expand All @@ -25,13 +25,45 @@
SnapshotOptions,
TokenStats,
)
from .protocols import AsyncBrowserProtocol, BrowserProtocol
from .snapshot import snapshot, snapshot_async
from .trace_event_builder import TraceEventBuilder

if TYPE_CHECKING:
from .tracing import Tracer


def _safe_tracer_call(
tracer: Optional["Tracer"], method_name: str, verbose: bool, *args, **kwargs
) -> None:
"""
Safely call tracer method, catching and logging errors without breaking execution.

Args:
tracer: Tracer instance or None
method_name: Name of tracer method to call (e.g., "emit", "emit_error")
verbose: Whether to print error messages
*args: Positional arguments for the tracer method
**kwargs: Keyword arguments for the tracer method
"""
if not tracer:
return
try:
method = getattr(tracer, method_name)
if args and kwargs:
method(*args, **kwargs)
elif args:
method(*args)
elif kwargs:
method(**kwargs)
else:
method()
except Exception as tracer_error:
# Tracer errors should not break agent execution
if verbose:
print(f"⚠️ Tracer error (non-fatal): {tracer_error}")


class SentienceAgent(BaseAgent):
"""
High-level agent that combines Sentience SDK with any LLM provider.
Expand All @@ -58,7 +90,7 @@ class SentienceAgent(BaseAgent):

def __init__(
self,
browser: SentienceBrowser,
browser: SentienceBrowser | BrowserProtocol,
llm: LLMProvider,
default_snapshot_limit: int = 50,
verbose: bool = True,
Expand All @@ -69,7 +101,8 @@ def __init__(
Initialize Sentience Agent

Args:
browser: SentienceBrowser instance
browser: SentienceBrowser instance or BrowserProtocol-compatible object
(for testing, can use mock objects that implement BrowserProtocol)
llm: LLM provider (OpenAIProvider, AnthropicProvider, etc.)
default_snapshot_limit: Default maximum elements to include in context (default: 50)
verbose: Print execution logs (default: True)
Expand Down Expand Up @@ -157,7 +190,10 @@ def act( # noqa: C901
# Emit step_start trace event if tracer is enabled
if self.tracer:
pre_url = self.browser.page.url if self.browser.page else None
self.tracer.emit_step_start(
_safe_tracer_call(
self.tracer,
"emit_step_start",
self.verbose,
step_id=step_id,
step_index=self._step_count,
goal=goal,
Expand Down Expand Up @@ -226,7 +262,10 @@ def act( # noqa: C901
if snap.screenshot_format:
snapshot_data["screenshot_format"] = snap.screenshot_format

self.tracer.emit(
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"snapshot",
snapshot_data,
step_id=step_id,
Expand All @@ -252,7 +291,10 @@ def act( # noqa: C901

# Emit LLM query trace event if tracer is enabled
if self.tracer:
self.tracer.emit(
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"llm_query",
{
"prompt_tokens": llm_response.prompt_tokens,
Expand Down Expand Up @@ -313,7 +355,10 @@ def act( # noqa: C901
for el in filtered_snap.elements[:50]
]

self.tracer.emit(
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"action",
{
"action": result.action,
Expand Down Expand Up @@ -433,14 +478,28 @@ def act( # noqa: C901
verify_data=verify_data,
)

self.tracer.emit("step_end", step_end_data, step_id=step_id)
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"step_end",
step_end_data,
step_id=step_id,
)

return result

except Exception as e:
# Emit error trace event if tracer is enabled
if self.tracer:
self.tracer.emit_error(step_id=step_id, error=str(e), attempt=attempt)
_safe_tracer_call(
self.tracer,
"emit_error",
self.verbose,
step_id=step_id,
error=str(e),
attempt=attempt,
)

if attempt < max_retries:
if self.verbose:
Expand Down Expand Up @@ -666,7 +725,10 @@ async def act( # noqa: C901
# Emit step_start trace event if tracer is enabled
if self.tracer:
pre_url = self.browser.page.url if self.browser.page else None
self.tracer.emit_step_start(
_safe_tracer_call(
self.tracer,
"emit_step_start",
self.verbose,
step_id=step_id,
step_index=self._step_count,
goal=goal,
Expand Down Expand Up @@ -738,7 +800,10 @@ async def act( # noqa: C901
if snap.screenshot_format:
snapshot_data["screenshot_format"] = snap.screenshot_format

self.tracer.emit(
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"snapshot",
snapshot_data,
step_id=step_id,
Expand All @@ -764,7 +829,10 @@ async def act( # noqa: C901

# Emit LLM query trace event if tracer is enabled
if self.tracer:
self.tracer.emit(
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"llm_query",
{
"prompt_tokens": llm_response.prompt_tokens,
Expand Down Expand Up @@ -825,7 +893,10 @@ async def act( # noqa: C901
for el in filtered_snap.elements[:50]
]

self.tracer.emit(
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"action",
{
"action": result.action,
Expand Down Expand Up @@ -945,14 +1016,28 @@ async def act( # noqa: C901
verify_data=verify_data,
)

self.tracer.emit("step_end", step_end_data, step_id=step_id)
_safe_tracer_call(
self.tracer,
"emit",
self.verbose,
"step_end",
step_end_data,
step_id=step_id,
)

return result

except Exception as e:
# Emit error trace event if tracer is enabled
if self.tracer:
self.tracer.emit_error(step_id=step_id, error=str(e), attempt=attempt)
_safe_tracer_call(
self.tracer,
"emit_error",
self.verbose,
step_id=step_id,
error=str(e),
attempt=attempt,
)

if attempt < max_retries:
if self.verbose:
Expand Down
13 changes: 10 additions & 3 deletions sentience/conversational_agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,13 @@

import json
import time
from typing import Any
from typing import Any, Union

from .agent import SentienceAgent
from .browser import SentienceBrowser
from .llm_provider import LLMProvider
from .models import ExtractionResult, Snapshot, SnapshotOptions, StepExecutionResult
from .protocols import BrowserProtocol
from .snapshot import snapshot


Expand All @@ -29,12 +30,18 @@ class ConversationalAgent:
The top result is from amazon.com selling the Apple Magic Mouse 2 for $79."
"""

def __init__(self, browser: SentienceBrowser, llm: LLMProvider, verbose: bool = True):
def __init__(
self,
browser: SentienceBrowser | BrowserProtocol,
llm: LLMProvider,
verbose: bool = True,
):
"""
Initialize conversational agent

Args:
browser: SentienceBrowser instance
browser: SentienceBrowser instance or BrowserProtocol-compatible object
(for testing, can use mock objects that implement BrowserProtocol)
llm: LLM provider (OpenAI, Anthropic, LocalLLM, etc.)
verbose: Print step-by-step execution logs (default: True)
"""
Expand Down
2 changes: 1 addition & 1 deletion sentience/element_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,7 @@ def filter_by_importance(
def filter_by_goal(
snapshot: Snapshot,
goal: str | None,
max_elements: int = 50,
max_elements: int = 100,
) -> list[Element]:
"""
Filter elements from snapshot based on goal context.
Expand Down
Loading