Skip to content

Commit 3ff3783

Browse files
authored
Merge pull request #110 from SentienceAPI/importance_score
close gaps in diff_status and importance
2 parents f4d3151 + 06bfbb3 commit 3ff3783

File tree

7 files changed

+602
-25
lines changed

7 files changed

+602
-25
lines changed

sentience/agent.py

Lines changed: 65 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -27,6 +27,7 @@
2727
)
2828
from .protocols import AsyncBrowserProtocol, BrowserProtocol
2929
from .snapshot import snapshot, snapshot_async
30+
from .snapshot_diff import SnapshotDiff
3031
from .trace_event_builder import TraceEventBuilder
3132

3233
if TYPE_CHECKING:
@@ -135,6 +136,9 @@ def __init__(
135136
# Step counter for tracing
136137
self._step_count = 0
137138

139+
# Previous snapshot for diff detection
140+
self._previous_snapshot: Snapshot | None = None
141+
138142
def _compute_hash(self, text: str) -> str:
139143
"""Compute SHA256 hash of text."""
140144
return hashlib.sha256(text.encode("utf-8")).hexdigest()
@@ -235,13 +239,31 @@ def act( # noqa: C901
235239
if snap.status != "success":
236240
raise RuntimeError(f"Snapshot failed: {snap.error}")
237241

242+
# Compute diff_status by comparing with previous snapshot
243+
elements_with_diff = SnapshotDiff.compute_diff_status(snap, self._previous_snapshot)
244+
245+
# Create snapshot with diff_status populated
246+
snap_with_diff = Snapshot(
247+
status=snap.status,
248+
timestamp=snap.timestamp,
249+
url=snap.url,
250+
viewport=snap.viewport,
251+
elements=elements_with_diff,
252+
screenshot=snap.screenshot,
253+
screenshot_format=snap.screenshot_format,
254+
error=snap.error,
255+
)
256+
257+
# Update previous snapshot for next comparison
258+
self._previous_snapshot = snap
259+
238260
# Apply element filtering based on goal
239-
filtered_elements = self.filter_elements(snap, goal)
261+
filtered_elements = self.filter_elements(snap_with_diff, goal)
240262

241263
# Emit snapshot trace event if tracer is enabled
242264
if self.tracer:
243-
# Build snapshot event data
244-
snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
265+
# Build snapshot event data (use snap_with_diff to include diff_status)
266+
snapshot_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)
245267

246268
# Always include screenshot in trace event for studio viewer compatibility
247269
# CloudTraceSink will extract and upload screenshots separately, then remove
@@ -271,16 +293,16 @@ def act( # noqa: C901
271293
step_id=step_id,
272294
)
273295

274-
# Create filtered snapshot
296+
# Create filtered snapshot (use snap_with_diff to preserve metadata)
275297
filtered_snap = Snapshot(
276-
status=snap.status,
277-
timestamp=snap.timestamp,
278-
url=snap.url,
279-
viewport=snap.viewport,
298+
status=snap_with_diff.status,
299+
timestamp=snap_with_diff.timestamp,
300+
url=snap_with_diff.url,
301+
viewport=snap_with_diff.viewport,
280302
elements=filtered_elements,
281-
screenshot=snap.screenshot,
282-
screenshot_format=snap.screenshot_format,
283-
error=snap.error,
303+
screenshot=snap_with_diff.screenshot,
304+
screenshot_format=snap_with_diff.screenshot_format,
305+
error=snap_with_diff.error,
284306
)
285307

286308
# 2. GROUND: Format elements for LLM context
@@ -673,6 +695,9 @@ def __init__(
673695
# Step counter for tracing
674696
self._step_count = 0
675697

698+
# Previous snapshot for diff detection
699+
self._previous_snapshot: Snapshot | None = None
700+
676701
def _compute_hash(self, text: str) -> str:
677702
"""Compute SHA256 hash of text."""
678703
return hashlib.sha256(text.encode("utf-8")).hexdigest()
@@ -773,13 +798,31 @@ async def act( # noqa: C901
773798
if snap.status != "success":
774799
raise RuntimeError(f"Snapshot failed: {snap.error}")
775800

801+
# Compute diff_status by comparing with previous snapshot
802+
elements_with_diff = SnapshotDiff.compute_diff_status(snap, self._previous_snapshot)
803+
804+
# Create snapshot with diff_status populated
805+
snap_with_diff = Snapshot(
806+
status=snap.status,
807+
timestamp=snap.timestamp,
808+
url=snap.url,
809+
viewport=snap.viewport,
810+
elements=elements_with_diff,
811+
screenshot=snap.screenshot,
812+
screenshot_format=snap.screenshot_format,
813+
error=snap.error,
814+
)
815+
816+
# Update previous snapshot for next comparison
817+
self._previous_snapshot = snap
818+
776819
# Apply element filtering based on goal
777-
filtered_elements = self.filter_elements(snap, goal)
820+
filtered_elements = self.filter_elements(snap_with_diff, goal)
778821

779822
# Emit snapshot trace event if tracer is enabled
780823
if self.tracer:
781-
# Build snapshot event data
782-
snapshot_data = TraceEventBuilder.build_snapshot_event(snap)
824+
# Build snapshot event data (use snap_with_diff to include diff_status)
825+
snapshot_data = TraceEventBuilder.build_snapshot_event(snap_with_diff)
783826

784827
# Always include screenshot in trace event for studio viewer compatibility
785828
# CloudTraceSink will extract and upload screenshots separately, then remove
@@ -809,16 +852,16 @@ async def act( # noqa: C901
809852
step_id=step_id,
810853
)
811854

812-
# Create filtered snapshot
855+
# Create filtered snapshot (use snap_with_diff to preserve metadata)
813856
filtered_snap = Snapshot(
814-
status=snap.status,
815-
timestamp=snap.timestamp,
816-
url=snap.url,
817-
viewport=snap.viewport,
857+
status=snap_with_diff.status,
858+
timestamp=snap_with_diff.timestamp,
859+
url=snap_with_diff.url,
860+
viewport=snap_with_diff.viewport,
818861
elements=filtered_elements,
819-
screenshot=snap.screenshot,
820-
screenshot_format=snap.screenshot_format,
821-
error=snap.error,
862+
screenshot=snap_with_diff.screenshot,
863+
screenshot_format=snap_with_diff.screenshot_format,
864+
error=snap_with_diff.error,
822865
)
823866

824867
# 2. GROUND: Format elements for LLM context

sentience/models.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,6 +51,9 @@ class Element(BaseModel):
5151
ml_probability: float | None = None # Confidence score from ONNX model (0.0 - 1.0)
5252
ml_score: float | None = None # Raw logit score (optional, for debugging)
5353

54+
# Diff status for frontend Diff Overlay feature
55+
diff_status: Literal["ADDED", "REMOVED", "MODIFIED", "MOVED"] | None = None
56+
5457

5558
class Snapshot(BaseModel):
5659
"""Snapshot response from extension"""

sentience/snapshot_diff.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
"""
2+
Snapshot comparison utilities for diff_status detection.
3+
4+
Implements change detection logic for the Diff Overlay feature.
5+
"""
6+
7+
from typing import Literal
8+
9+
from .models import Element, Snapshot
10+
11+
12+
class SnapshotDiff:
13+
"""
14+
Utility for comparing snapshots and computing diff_status for elements.
15+
16+
Implements the logic described in DIFF_STATUS_GAP_ANALYSIS.md:
17+
- ADDED: Element exists in current but not in previous
18+
- REMOVED: Element existed in previous but not in current
19+
- MODIFIED: Element exists in both but has changed
20+
- MOVED: Element exists in both but position changed
21+
"""
22+
23+
@staticmethod
24+
def _has_bbox_changed(el1: Element, el2: Element, threshold: float = 5.0) -> bool:
25+
"""
26+
Check if element's bounding box has changed significantly.
27+
28+
Args:
29+
el1: First element
30+
el2: Second element
31+
threshold: Position change threshold in pixels (default: 5.0)
32+
33+
Returns:
34+
True if position or size changed beyond threshold
35+
"""
36+
return (
37+
abs(el1.bbox.x - el2.bbox.x) > threshold
38+
or abs(el1.bbox.y - el2.bbox.y) > threshold
39+
or abs(el1.bbox.width - el2.bbox.width) > threshold
40+
or abs(el1.bbox.height - el2.bbox.height) > threshold
41+
)
42+
43+
@staticmethod
44+
def _has_content_changed(el1: Element, el2: Element) -> bool:
45+
"""
46+
Check if element's content has changed.
47+
48+
Args:
49+
el1: First element
50+
el2: Second element
51+
52+
Returns:
53+
True if text, role, or visual properties changed
54+
"""
55+
# Compare text content
56+
if el1.text != el2.text:
57+
return True
58+
59+
# Compare role
60+
if el1.role != el2.role:
61+
return True
62+
63+
# Compare visual cues
64+
if el1.visual_cues.is_primary != el2.visual_cues.is_primary:
65+
return True
66+
if el1.visual_cues.is_clickable != el2.visual_cues.is_clickable:
67+
return True
68+
69+
return False
70+
71+
@staticmethod
72+
def compute_diff_status(
73+
current: Snapshot,
74+
previous: Snapshot | None,
75+
) -> list[Element]:
76+
"""
77+
Compare current snapshot with previous and set diff_status on elements.
78+
79+
Args:
80+
current: Current snapshot
81+
previous: Previous snapshot (None if this is the first snapshot)
82+
83+
Returns:
84+
List of elements with diff_status set (includes REMOVED elements from previous)
85+
"""
86+
# If no previous snapshot, all current elements are ADDED
87+
if previous is None:
88+
result = []
89+
for el in current.elements:
90+
# Create a copy with diff_status set
91+
el_dict = el.model_dump()
92+
el_dict["diff_status"] = "ADDED"
93+
result.append(Element(**el_dict))
94+
return result
95+
96+
# Build lookup maps by element ID
97+
current_by_id = {el.id: el for el in current.elements}
98+
previous_by_id = {el.id: el for el in previous.elements}
99+
100+
current_ids = set(current_by_id.keys())
101+
previous_ids = set(previous_by_id.keys())
102+
103+
result: list[Element] = []
104+
105+
# Process current elements
106+
for el in current.elements:
107+
el_dict = el.model_dump()
108+
109+
if el.id not in previous_ids:
110+
# Element is new - mark as ADDED
111+
el_dict["diff_status"] = "ADDED"
112+
else:
113+
# Element existed before - check for changes
114+
prev_el = previous_by_id[el.id]
115+
116+
bbox_changed = SnapshotDiff._has_bbox_changed(el, prev_el)
117+
content_changed = SnapshotDiff._has_content_changed(el, prev_el)
118+
119+
if bbox_changed and content_changed:
120+
# Both position and content changed - mark as MODIFIED
121+
el_dict["diff_status"] = "MODIFIED"
122+
elif bbox_changed:
123+
# Only position changed - mark as MOVED
124+
el_dict["diff_status"] = "MOVED"
125+
elif content_changed:
126+
# Only content changed - mark as MODIFIED
127+
el_dict["diff_status"] = "MODIFIED"
128+
else:
129+
# No change - don't set diff_status (frontend expects undefined)
130+
el_dict["diff_status"] = None
131+
132+
result.append(Element(**el_dict))
133+
134+
# Process removed elements (existed in previous but not in current)
135+
for prev_id in previous_ids - current_ids:
136+
prev_el = previous_by_id[prev_id]
137+
el_dict = prev_el.model_dump()
138+
el_dict["diff_status"] = "REMOVED"
139+
result.append(Element(**el_dict))
140+
141+
return result

sentience/trace_event_builder.py

Lines changed: 27 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,9 +35,34 @@ def build_snapshot_event(
3535
Returns:
3636
Dictionary with snapshot event data
3737
"""
38+
# Normalize importance values to importance_score (0-1 range) per snapshot
39+
# Min-max normalization: (value - min) / (max - min)
40+
importance_values = [el.importance for el in snapshot.elements]
41+
42+
if importance_values:
43+
min_importance = min(importance_values)
44+
max_importance = max(importance_values)
45+
importance_range = max_importance - min_importance
46+
else:
47+
min_importance = 0
48+
max_importance = 0
49+
importance_range = 0
50+
3851
# Include ALL elements with full data for DOM tree display
39-
# Use snap.elements (all elements) not filtered_elements
40-
elements_data = [el.model_dump() for el in snapshot.elements]
52+
# Add importance_score field normalized to [0, 1]
53+
elements_data = []
54+
for el in snapshot.elements:
55+
el_dict = el.model_dump()
56+
57+
# Compute normalized importance_score
58+
if importance_range > 0:
59+
importance_score = (el.importance - min_importance) / importance_range
60+
else:
61+
# If all elements have same importance, set to 0.5
62+
importance_score = 0.5
63+
64+
el_dict["importance_score"] = importance_score
65+
elements_data.append(el_dict)
4166

4267
return {
4368
"url": snapshot.url,

sentience/tracing.py

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,6 @@
44
Provides abstract interface and JSONL implementation for emitting trace events.
55
"""
66

7-
import json
87
import time
98
from abc import ABC, abstractmethod
109
from dataclasses import dataclass, field

0 commit comments

Comments
 (0)