Skip to content

Commit aa00325

Browse files
committed
phase 2.2 and 2.3 done
1 parent 0316503 commit aa00325

File tree

7 files changed

+282
-203
lines changed

7 files changed

+282
-203
lines changed

sentience/agent.py

Lines changed: 47 additions & 195 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@
1313
from .agent_config import AgentConfig
1414
from .base_agent import BaseAgent, BaseAgentAsync
1515
from .browser import AsyncSentienceBrowser, SentienceBrowser
16+
from .element_filter import ElementFilter
1617
from .llm_provider import LLMProvider, LLMResponse
1718
from .models import (
1819
ActionHistory,
@@ -25,6 +26,7 @@
2526
TokenStats,
2627
)
2728
from .snapshot import snapshot, snapshot_async
29+
from .trace_event_builder import TraceEventBuilder
2830

2931
if TYPE_CHECKING:
3032
from .tracing import Tracer
@@ -100,9 +102,7 @@ def _compute_hash(self, text: str) -> str:
100102
"""Compute SHA256 hash of text."""
101103
return hashlib.sha256(text.encode("utf-8")).hexdigest()
102104

103-
def _get_element_bbox(
104-
self, element_id: int | None, snap: Snapshot
105-
) -> dict[str, float] | None:
105+
def _get_element_bbox(self, element_id: int | None, snap: Snapshot) -> dict[str, float] | None:
106106
"""Get bounding box for an element from snapshot."""
107107
if element_id is None:
108108
return None
@@ -200,17 +200,8 @@ def act( # noqa: C901
200200

201201
# Emit snapshot trace event if tracer is enabled
202202
if self.tracer:
203-
# Include ALL elements with full data for DOM tree display
204-
# Use snap.elements (all elements) not filtered_elements
205-
elements_data = [el.model_dump() for el in snap.elements]
206-
207203
# Build snapshot event data
208-
snapshot_data = {
209-
"url": snap.url,
210-
"element_count": len(snap.elements),
211-
"timestamp": snap.timestamp,
212-
"elements": elements_data, # Full element data for DOM tree
213-
}
204+
snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
214205

215206
# Always include screenshot in trace event for studio viewer compatibility
216207
# CloudTraceSink will extract and upload screenshots separately, then remove
@@ -425,23 +416,18 @@ def act( # noqa: C901
425416
}
426417

427418
# Build complete step_end event
428-
step_end_data = {
429-
"v": 1,
430-
"step_id": step_id,
431-
"step_index": self._step_count,
432-
"goal": goal,
433-
"attempt": attempt,
434-
"pre": {
435-
"url": pre_url,
436-
"snapshot_digest": snapshot_digest,
437-
},
438-
"llm": llm_data,
439-
"exec": exec_data,
440-
"post": {
441-
"url": post_url,
442-
},
443-
"verify": verify_data,
444-
}
419+
step_end_data = TraceEventBuilder.build_step_end_event(
420+
step_id=step_id,
421+
step_index=self._step_count,
422+
goal=goal,
423+
attempt=attempt,
424+
pre_url=pre_url,
425+
post_url=post_url,
426+
snapshot_digest=snapshot_digest,
427+
llm_data=llm_data,
428+
exec_data=exec_data,
429+
verify_data=verify_data,
430+
)
445431

446432
self.tracer.emit("step_end", step_end_data, step_id=step_id)
447433

@@ -723,8 +709,8 @@ def filter_elements(self, snapshot: Snapshot, goal: str | None = None) -> list[E
723709
"""
724710
Filter elements from snapshot based on goal context.
725711
726-
This default implementation applies goal-based keyword matching to boost
727-
relevant elements and filters out irrelevant ones.
712+
This implementation uses ElementFilter to apply goal-based keyword matching
713+
to boost relevant elements and filters out irrelevant ones.
728714
729715
Args:
730716
snapshot: Current page snapshot
@@ -733,76 +719,7 @@ def filter_elements(self, snapshot: Snapshot, goal: str | None = None) -> list[E
733719
Returns:
734720
Filtered list of elements
735721
"""
736-
elements = snapshot.elements
737-
738-
# If no goal provided, return all elements (up to limit)
739-
if not goal:
740-
return elements[: self.default_snapshot_limit]
741-
742-
goal_lower = goal.lower()
743-
744-
# Extract keywords from goal
745-
keywords = self._extract_keywords(goal_lower)
746-
747-
# Boost elements matching goal keywords
748-
scored_elements = []
749-
for el in elements:
750-
score = el.importance
751-
752-
# Boost if element text matches goal
753-
if el.text and any(kw in el.text.lower() for kw in keywords):
754-
score += 0.3
755-
756-
# Boost if role matches goal intent
757-
if "click" in goal_lower and el.visual_cues.is_clickable:
758-
score += 0.2
759-
if "type" in goal_lower and el.role in ["textbox", "searchbox"]:
760-
score += 0.2
761-
if "search" in goal_lower:
762-
# Filter out non-interactive elements for search tasks
763-
if el.role in ["link", "img"] and not el.visual_cues.is_primary:
764-
score -= 0.5
765-
766-
scored_elements.append((score, el))
767-
768-
# Re-sort by boosted score
769-
scored_elements.sort(key=lambda x: x[0], reverse=True)
770-
elements = [el for _, el in scored_elements]
771-
772-
return elements[: self.default_snapshot_limit]
773-
774-
def _extract_keywords(self, text: str) -> list[str]:
775-
"""
776-
Extract meaningful keywords from goal text
777-
778-
Args:
779-
text: Text to extract keywords from
780-
781-
Returns:
782-
List of keywords
783-
"""
784-
stopwords = {
785-
"the",
786-
"a",
787-
"an",
788-
"and",
789-
"or",
790-
"but",
791-
"in",
792-
"on",
793-
"at",
794-
"to",
795-
"for",
796-
"of",
797-
"with",
798-
"by",
799-
"from",
800-
"as",
801-
"is",
802-
"was",
803-
}
804-
words = text.split()
805-
return [w for w in words if w not in stopwords and len(w) > 2]
722+
return ElementFilter.filter_by_goal(snapshot, goal, self.default_snapshot_limit)
806723

807724

808725
class SentienceAgentAsync(BaseAgentAsync):
@@ -874,9 +791,7 @@ def _compute_hash(self, text: str) -> str:
874791
"""Compute SHA256 hash of text."""
875792
return hashlib.sha256(text.encode("utf-8")).hexdigest()
876793

877-
def _get_element_bbox(
878-
self, element_id: int | None, snap: Snapshot
879-
) -> dict[str, float] | None:
794+
def _get_element_bbox(self, element_id: int | None, snap: Snapshot) -> dict[str, float] | None:
880795
"""Get bounding box for an element from snapshot."""
881796
if element_id is None:
882797
return None
@@ -974,17 +889,8 @@ async def act( # noqa: C901
974889

975890
# Emit snapshot trace event if tracer is enabled
976891
if self.tracer:
977-
# Include ALL elements with full data for DOM tree display
978-
# Use snap.elements (all elements) not filtered_elements
979-
elements_data = [el.model_dump() for el in snap.elements]
980-
981892
# Build snapshot event data
982-
snapshot_data = {
983-
"url": snap.url,
984-
"element_count": len(snap.elements),
985-
"timestamp": snap.timestamp,
986-
"elements": elements_data, # Full element data for DOM tree
987-
}
893+
snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
988894

989895
# Always include screenshot in trace event for studio viewer compatibility
990896
# CloudTraceSink will extract and upload screenshots separately, then remove
@@ -1199,23 +1105,18 @@ async def act( # noqa: C901
11991105
}
12001106

12011107
# Build complete step_end event
1202-
step_end_data = {
1203-
"v": 1,
1204-
"step_id": step_id,
1205-
"step_index": self._step_count,
1206-
"goal": goal,
1207-
"attempt": attempt,
1208-
"pre": {
1209-
"url": pre_url,
1210-
"snapshot_digest": snapshot_digest,
1211-
},
1212-
"llm": llm_data,
1213-
"exec": exec_data,
1214-
"post": {
1215-
"url": post_url,
1216-
},
1217-
"verify": verify_data,
1218-
}
1108+
step_end_data = TraceEventBuilder.build_step_end_event(
1109+
step_id=step_id,
1110+
step_index=self._step_count,
1111+
goal=goal,
1112+
attempt=attempt,
1113+
pre_url=pre_url,
1114+
post_url=post_url,
1115+
snapshot_digest=snapshot_digest,
1116+
llm_data=llm_data,
1117+
exec_data=exec_data,
1118+
verify_data=verify_data,
1119+
)
12191120

12201121
self.tracer.emit("step_end", step_end_data, step_id=step_id)
12211122

@@ -1447,66 +1348,17 @@ def clear_history(self) -> None:
14471348
}
14481349

14491350
def filter_elements(self, snapshot: Snapshot, goal: str | None = None) -> list[Element]:
1450-
"""Filter elements from snapshot based on goal context (same as sync version)"""
1451-
elements = snapshot.elements
1452-
1453-
# If no goal provided, return all elements (up to limit)
1454-
if not goal:
1455-
return elements[: self.default_snapshot_limit]
1456-
1457-
goal_lower = goal.lower()
1458-
1459-
# Extract keywords from goal
1460-
keywords = self._extract_keywords(goal_lower)
1461-
1462-
# Boost elements matching goal keywords
1463-
scored_elements = []
1464-
for el in elements:
1465-
score = el.importance
1466-
1467-
# Boost if element text matches goal
1468-
if el.text and any(kw in el.text.lower() for kw in keywords):
1469-
score += 0.3
1470-
1471-
# Boost if role matches goal intent
1472-
if "click" in goal_lower and el.visual_cues.is_clickable:
1473-
score += 0.2
1474-
if "type" in goal_lower and el.role in ["textbox", "searchbox"]:
1475-
score += 0.2
1476-
if "search" in goal_lower:
1477-
# Filter out non-interactive elements for search tasks
1478-
if el.role in ["link", "img"] and not el.visual_cues.is_primary:
1479-
score -= 0.5
1480-
1481-
scored_elements.append((score, el))
1482-
1483-
# Re-sort by boosted score
1484-
scored_elements.sort(key=lambda x: x[0], reverse=True)
1485-
elements = [el for _, el in scored_elements]
1486-
1487-
return elements[: self.default_snapshot_limit]
1488-
1489-
def _extract_keywords(self, text: str) -> list[str]:
1490-
"""Extract meaningful keywords from goal text (same as sync version)"""
1491-
stopwords = {
1492-
"the",
1493-
"a",
1494-
"an",
1495-
"and",
1496-
"or",
1497-
"but",
1498-
"in",
1499-
"on",
1500-
"at",
1501-
"to",
1502-
"for",
1503-
"of",
1504-
"with",
1505-
"by",
1506-
"from",
1507-
"as",
1508-
"is",
1509-
"was",
1510-
}
1511-
words = text.split()
1512-
return [w for w in words if w not in stopwords and len(w) > 2]
1351+
"""
1352+
Filter elements from snapshot based on goal context.
1353+
1354+
This implementation uses ElementFilter to apply goal-based keyword matching
1355+
to boost relevant elements and filters out irrelevant ones.
1356+
1357+
Args:
1358+
snapshot: Current page snapshot
1359+
goal: User's goal (can inform filtering)
1360+
1361+
Returns:
1362+
Filtered list of elements
1363+
"""
1364+
return ElementFilter.filter_by_goal(snapshot, goal, self.default_snapshot_limit)

sentience/browser_evaluator.py

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@ class BrowserEvaluator:
2121

2222
@staticmethod
2323
def wait_for_extension(
24-
page: Union[Page, AsyncPage],
24+
page: Page | AsyncPage,
2525
timeout_ms: int = 5000,
2626
) -> None:
2727
"""
@@ -79,7 +79,7 @@ async def wait_for_extension_async(
7979
) from e
8080

8181
@staticmethod
82-
def _gather_diagnostics(page: Union[Page, AsyncPage]) -> dict[str, Any]:
82+
def _gather_diagnostics(page: Page | AsyncPage) -> dict[str, Any]:
8383
"""
8484
Gather diagnostics about extension state.
8585
@@ -297,4 +297,3 @@ async def verify_method_exists_async(
297297
return await page.evaluate(f"typeof window.sentience.{method_name} !== 'undefined'")
298298
except Exception:
299299
return False
300-

sentience/cloud_tracing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,6 @@
1313
from concurrent.futures import ThreadPoolExecutor, as_completed
1414
from pathlib import Path
1515
from typing import Any, Optional, Protocol, Union
16-
from collections.abc import Callable
1716

1817
import requests
1918

0 commit comments

Comments
 (0)