Skip to content

Commit 6a4d4c6

Browse files
author
SentienceDEV
committed
visual overlay on elements
1 parent 0166146 commit 6a4d4c6

File tree

4 files changed

+152
-0
lines changed

4 files changed

+152
-0
lines changed

sentience/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@
4545
Viewport,
4646
WaitResult,
4747
)
48+
from .overlay import clear_overlay, show_overlay
4849
from .query import find, query
4950
from .read import read
5051
from .recorder import Recorder, Trace, TraceStep, record
@@ -93,6 +94,8 @@
9394
"generate",
9495
"read",
9596
"screenshot",
97+
"show_overlay",
98+
"clear_overlay",
9699
# Agent Layer (Phase 1 & 2)
97100
"BaseAgent",
98101
"LLMProvider",

sentience/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -117,6 +117,7 @@ class SnapshotOptions(BaseModel):
117117
save_trace: bool = False # Save raw_elements to JSON for benchmarking/training
118118
trace_path: str | None = None # Path to save trace (default: "trace_{timestamp}.json")
119119
goal: str | None = None # Optional goal/task description for the snapshot
120+
show_overlay: bool = False # Show visual overlay highlighting elements in browser
120121

121122
class Config:
122123
arbitrary_types_allowed = True

sentience/overlay.py

Lines changed: 115 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,115 @@
1+
"""
2+
Visual overlay utilities - show/clear element highlights in browser
3+
"""
4+
5+
from typing import Any
6+
7+
from .browser import SentienceBrowser
8+
from .models import Element, Snapshot
9+
10+
11+
def show_overlay(
12+
browser: SentienceBrowser,
13+
elements: list[Element] | list[dict[str, Any]] | Snapshot,
14+
target_element_id: int | None = None,
15+
) -> None:
16+
"""
17+
Display visual overlay highlighting elements in the browser
18+
19+
This function shows a Shadow DOM overlay with color-coded borders around
20+
detected elements. Useful for debugging, learning, and validating element detection.
21+
22+
Args:
23+
browser: SentienceBrowser instance
24+
elements: Can be:
25+
- List of Element objects (from snapshot.elements)
26+
- List of raw element dicts (from snapshot result or API response)
27+
- Snapshot object (will use snapshot.elements)
28+
target_element_id: Optional ID of element to highlight in red (default: None)
29+
30+
Color Coding:
31+
- Red: Target element (when target_element_id is specified)
32+
- Blue: Primary elements (is_primary=true)
33+
- Green: Regular interactive elements
34+
35+
Visual Indicators:
36+
- Border thickness and opacity scale with importance score
37+
- Semi-transparent fill for better visibility
38+
- Importance badges showing scores
39+
- Star icon for primary elements
40+
- Target emoji for the target element
41+
42+
Auto-clear: Overlay automatically disappears after 5 seconds
43+
44+
Example:
45+
# Show overlay from snapshot
46+
snap = snapshot(browser)
47+
show_overlay(browser, snap)
48+
49+
# Show overlay with custom elements
50+
elements = [{"id": 1, "bbox": {"x": 100, "y": 100, "width": 200, "height": 50}, ...}]
51+
show_overlay(browser, elements)
52+
53+
# Show overlay with target element highlighted in red
54+
show_overlay(browser, snap, target_element_id=42)
55+
56+
# Clear overlay manually before 5 seconds
57+
clear_overlay(browser)
58+
"""
59+
if not browser.page:
60+
raise RuntimeError("Browser not started. Call browser.start() first.")
61+
62+
# Handle different input types
63+
if isinstance(elements, Snapshot):
64+
# Extract elements from Snapshot object
65+
elements_list = [el.model_dump() for el in elements.elements]
66+
elif isinstance(elements, list) and len(elements) > 0:
67+
# Check if it's a list of Element objects or dicts
68+
if hasattr(elements[0], "model_dump"):
69+
# List of Element objects
70+
elements_list = [el.model_dump() for el in elements]
71+
else:
72+
# Already a list of dicts
73+
elements_list = elements
74+
else:
75+
raise ValueError("elements must be a Snapshot, list of Element objects, or list of dicts")
76+
77+
# Call extension API
78+
browser.page.evaluate(
79+
"""
80+
(args) => {
81+
if (window.sentience && window.sentience.showOverlay) {
82+
window.sentience.showOverlay(args.elements, args.targetId);
83+
} else {
84+
console.warn('[Sentience SDK] showOverlay not available - is extension loaded?');
85+
}
86+
}
87+
""",
88+
{"elements": elements_list, "targetId": target_element_id},
89+
)
90+
91+
92+
def clear_overlay(browser: SentienceBrowser) -> None:
93+
"""
94+
Clear the visual overlay manually (before 5-second auto-clear)
95+
96+
Args:
97+
browser: SentienceBrowser instance
98+
99+
Example:
100+
show_overlay(browser, snap)
101+
# ... inspect overlay ...
102+
clear_overlay(browser) # Remove immediately
103+
"""
104+
if not browser.page:
105+
raise RuntimeError("Browser not started. Call browser.start() first.")
106+
107+
browser.page.evaluate(
108+
"""
109+
() => {
110+
if (window.sentience && window.sentience.clearOverlay) {
111+
window.sentience.clearOverlay();
112+
}
113+
}
114+
"""
115+
)

sentience/snapshot.py

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def snapshot(
4444
use_api: bool | None = None,
4545
save_trace: bool = False,
4646
trace_path: str | None = None,
47+
show_overlay: bool = False,
4748
) -> Snapshot:
4849
"""
4950
Take a snapshot of the current page
@@ -57,6 +58,7 @@ def snapshot(
5758
If None, uses API if api_key is set, otherwise uses local extension.
5859
save_trace: Whether to save raw_elements to JSON for benchmarking/training
5960
trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json"
61+
show_overlay: Show visual overlay highlighting elements in browser
6062
6163
Returns:
6264
Snapshot object
@@ -69,6 +71,7 @@ def snapshot(
6971
use_api=use_api,
7072
save_trace=save_trace,
7173
trace_path=trace_path,
74+
show_overlay=show_overlay,
7275
)
7376

7477
# Determine if we should use server-side API
@@ -143,6 +146,21 @@ def _snapshot_via_extension(
143146
if options.save_trace:
144147
_save_trace_to_file(result.get("raw_elements", []), options.trace_path)
145148

149+
# Show visual overlay if requested
150+
if options.show_overlay:
151+
raw_elements = result.get("raw_elements", [])
152+
if raw_elements:
153+
browser.page.evaluate(
154+
"""
155+
(elements) => {
156+
if (window.sentience && window.sentience.showOverlay) {
157+
window.sentience.showOverlay(elements, null);
158+
}
159+
}
160+
""",
161+
raw_elements,
162+
)
163+
146164
# Validate and parse with Pydantic
147165
snapshot_obj = Snapshot(**result)
148166
return snapshot_obj
@@ -231,6 +249,21 @@ def _snapshot_via_api(
231249
"error": api_result.get("error"),
232250
}
233251

252+
# Show visual overlay if requested (use API-ranked elements)
253+
if options.show_overlay:
254+
elements = api_result.get("elements", [])
255+
if elements:
256+
browser.page.evaluate(
257+
"""
258+
(elements) => {
259+
if (window.sentience && window.sentience.showOverlay) {
260+
window.sentience.showOverlay(elements, null);
261+
}
262+
}
263+
""",
264+
elements,
265+
)
266+
234267
return Snapshot(**snapshot_data)
235268
except requests.exceptions.RequestException as e:
236269
raise RuntimeError(f"API request failed: {e}")

0 commit comments

Comments
 (0)