Skip to content

Commit b662c4e

Browse files
authored
Merge pull request #129 from SentienceAPI/bu_phase3
Phase 3: polish
2 parents 8020867 + c598d49 commit b662c4e

File tree

10 files changed

+932
-112
lines changed

10 files changed

+932
-112
lines changed
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
"""
2+
Example: Using Sentience with browser-use for element grounding.
3+
4+
This example demonstrates how to integrate Sentience's semantic element
5+
detection with browser-use, enabling accurate click/type/scroll operations
6+
using Sentience's snapshot-based grounding instead of coordinate estimation.
7+
8+
Requirements:
9+
pip install browser-use sentienceapi
10+
11+
Usage:
12+
python examples/browser_use_integration.py
13+
"""
14+
15+
import asyncio
16+
17+
# Sentience imports
18+
from sentience import find, get_extension_dir, query
19+
from sentience.backends import (
20+
BrowserUseAdapter,
21+
CachedSnapshot,
22+
ExtensionNotLoadedError,
23+
click,
24+
scroll,
25+
snapshot,
26+
type_text,
27+
)
28+
29+
# browser-use imports (install via: pip install browser-use)
30+
# from browser_use import BrowserSession, BrowserProfile
31+
32+
33+
async def main() -> None:
34+
"""
35+
Demo: Search on Google using Sentience grounding with browser-use.
36+
37+
This example shows the full workflow:
38+
1. Launch browser-use with Sentience extension loaded
39+
2. Create a Sentience backend adapter
40+
3. Take snapshots and interact with elements using semantic queries
41+
"""
42+
43+
# =========================================================================
44+
# STEP 1: Setup browser-use with Sentience extension
45+
# =========================================================================
46+
#
47+
# The Sentience extension must be loaded for element grounding to work.
48+
# Use get_extension_dir() to get the path to the bundled extension.
49+
#
50+
# Uncomment the following when running with browser-use installed:
51+
52+
# extension_path = get_extension_dir()
53+
# print(f"Loading Sentience extension from: {extension_path}")
54+
#
55+
# profile = BrowserProfile(
56+
# args=[
57+
# f"--load-extension={extension_path}",
58+
# "--disable-extensions-except=" + extension_path,
59+
# ],
60+
# )
61+
# session = BrowserSession(browser_profile=profile)
62+
# await session.start()
63+
64+
# =========================================================================
65+
# STEP 2: Create Sentience backend adapter
66+
# =========================================================================
67+
#
68+
# The adapter bridges browser-use's CDP client to Sentience's backend protocol.
69+
#
70+
# adapter = BrowserUseAdapter(session)
71+
# backend = await adapter.create_backend()
72+
73+
# =========================================================================
74+
# STEP 3: Navigate and take snapshots
75+
# =========================================================================
76+
#
77+
# await session.navigate("https://www.google.com")
78+
#
79+
# # Take a snapshot - this uses the Sentience extension's element detection
80+
# try:
81+
# snap = await snapshot(backend)
82+
# print(f"Found {len(snap.elements)} elements")
83+
# except ExtensionNotLoadedError as e:
84+
# print(f"Extension not loaded: {e}")
85+
# print("Make sure the browser was launched with --load-extension flag")
86+
# return
87+
88+
# =========================================================================
89+
# STEP 4: Find and interact with elements using semantic queries
90+
# =========================================================================
91+
#
92+
# Sentience provides powerful element selectors:
93+
# - Role-based: 'role=textbox', 'role=button'
94+
# - Name-based: 'role=button[name="Submit"]'
95+
# - Text-based: 'text=Search'
96+
#
97+
# # Find the search input
98+
# search_input = find(snap, 'role=textbox[name*="Search"]')
99+
# if search_input:
100+
# # Click on the search input (uses center of bounding box)
101+
# await click(backend, search_input.bbox)
102+
#
103+
# # Type search query
104+
# await type_text(backend, "Sentience AI browser automation")
105+
# print("Typed search query")
106+
107+
# =========================================================================
108+
# STEP 5: Using cached snapshots for efficiency
109+
# =========================================================================
110+
#
111+
# Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots:
112+
#
113+
# cache = CachedSnapshot(backend, max_age_ms=2000)
114+
#
115+
# # First call takes fresh snapshot
116+
# snap1 = await cache.get()
117+
#
118+
# # Second call returns cached version if less than 2 seconds old
119+
# snap2 = await cache.get()
120+
#
121+
# # After actions that modify DOM, invalidate the cache
122+
# await click(backend, some_element.bbox)
123+
# cache.invalidate() # Next get() will take fresh snapshot
124+
125+
# =========================================================================
126+
# STEP 6: Scrolling to elements
127+
# =========================================================================
128+
#
129+
# # Scroll down by 500 pixels
130+
# await scroll(backend, delta_y=500)
131+
#
132+
# # Scroll at a specific position (useful for scrollable containers)
133+
# await scroll(backend, delta_y=300, target=(400, 500))
134+
135+
# =========================================================================
136+
# STEP 7: Advanced element queries
137+
# =========================================================================
138+
#
139+
# # Find all buttons
140+
# buttons = query(snap, 'role=button')
141+
# print(f"Found {len(buttons)} buttons")
142+
#
143+
# # Find by partial text match
144+
# links = query(snap, 'role=link[name*="Learn"]')
145+
#
146+
# # Find by exact text
147+
# submit_btn = find(snap, 'role=button[name="Submit"]')
148+
149+
# =========================================================================
150+
# STEP 8: Error handling
151+
# =========================================================================
152+
#
153+
# Sentience provides specific exceptions for common errors:
154+
#
155+
# from sentience.backends import (
156+
# ExtensionNotLoadedError, # Extension not loaded in browser
157+
# SnapshotError, # Snapshot failed
158+
# ActionError, # Click/type/scroll failed
159+
# )
160+
#
161+
# try:
162+
# snap = await snapshot(backend)
163+
# except ExtensionNotLoadedError as e:
164+
# # The error message includes fix suggestions
165+
# print(f"Fix: {e}")
166+
167+
# =========================================================================
168+
# CLEANUP
169+
# =========================================================================
170+
#
171+
# await session.stop()
172+
173+
print("=" * 60)
174+
print("browser-use + Sentience Integration Example")
175+
print("=" * 60)
176+
print()
177+
print("This example demonstrates the integration pattern.")
178+
print("To run with a real browser, uncomment the code sections above")
179+
print("and install browser-use: pip install browser-use")
180+
print()
181+
print("Key imports:")
182+
print(" from sentience import get_extension_dir, find, query")
183+
print(" from sentience.backends import (")
184+
print(" BrowserUseAdapter, snapshot, click, type_text, scroll")
185+
print(" )")
186+
print()
187+
print("Extension path:", get_extension_dir())
188+
189+
190+
async def full_example() -> None:
191+
"""
192+
Complete working example - requires browser-use installed.
193+
194+
This is the uncommented version for users who have browser-use installed.
195+
"""
196+
# Import browser-use (uncomment when installed)
197+
# from browser_use import BrowserSession, BrowserProfile
198+
199+
print("To run the full example:")
200+
print("1. Install browser-use: pip install browser-use")
201+
print("2. Uncomment the imports in this function")
202+
print("3. Run: python examples/browser_use_integration.py")
203+
204+
205+
if __name__ == "__main__":
206+
asyncio.run(main())

sentience/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -118,7 +118,7 @@
118118
from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
119119
from .wait import wait_for
120120

121-
__version__ = "0.92.3"
121+
__version__ = "0.93.0"
122122

123123
__all__ = [
124124
# Extension helpers (for browser-use integration)

sentience/backends/__init__.py

Lines changed: 78 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,28 @@
55
Sentience actions (click, type, scroll) to work with different browser
66
automation frameworks.
77
8-
Supported backends:
9-
- PlaywrightBackend: Default backend using Playwright (existing SentienceBrowser)
10-
- CDPBackendV0: CDP-based backend for browser-use integration
8+
Supported Backends
9+
------------------
10+
11+
**PlaywrightBackend**
12+
Wraps Playwright Page objects. Use this when integrating with existing
13+
SentienceBrowser or Playwright-based code.
14+
15+
**CDPBackendV0**
16+
Low-level CDP (Chrome DevTools Protocol) backend. Use this when you have
17+
direct access to a CDP client and session.
18+
19+
**BrowserUseAdapter**
20+
High-level adapter for browser-use framework. Automatically creates a
21+
CDPBackendV0 from a BrowserSession.
22+
23+
Quick Start with browser-use
24+
----------------------------
25+
26+
.. code-block:: python
1127
12-
For browser-use integration:
1328
from browser_use import BrowserSession, BrowserProfile
14-
from sentience import get_extension_dir
29+
from sentience import get_extension_dir, find
1530
from sentience.backends import BrowserUseAdapter, snapshot, click, type_text
1631
1732
# Setup browser-use with Sentience extension
@@ -23,15 +38,63 @@
2338
adapter = BrowserUseAdapter(session)
2439
backend = await adapter.create_backend()
2540
26-
# Take snapshot and interact
41+
# Take snapshot and interact with elements
2742
snap = await snapshot(backend)
28-
element = find(snap, 'role=button[name="Submit"]')
43+
search_box = find(snap, 'role=textbox[name*="Search"]')
44+
await click(backend, search_box.bbox)
45+
await type_text(backend, "Sentience AI")
46+
47+
Snapshot Caching
48+
----------------
49+
50+
Use CachedSnapshot to reduce redundant snapshot calls in action loops:
51+
52+
.. code-block:: python
53+
54+
from sentience.backends import CachedSnapshot
55+
56+
cache = CachedSnapshot(backend, max_age_ms=2000)
57+
58+
snap1 = await cache.get() # Takes fresh snapshot
59+
snap2 = await cache.get() # Returns cached if < 2s old
60+
2961
await click(backend, element.bbox)
62+
cache.invalidate() # Force refresh on next get()
63+
64+
Error Handling
65+
--------------
66+
67+
The module provides specific exceptions for common failure modes:
68+
69+
- ``ExtensionNotLoadedError``: Extension not loaded in browser launch args
70+
- ``SnapshotError``: window.sentience.snapshot() failed
71+
- ``ActionError``: Click/type/scroll operation failed
72+
73+
All exceptions inherit from ``SentienceBackendError`` and include helpful
74+
fix suggestions in their error messages.
75+
76+
.. code-block:: python
77+
78+
from sentience.backends import ExtensionNotLoadedError, snapshot
79+
80+
try:
81+
snap = await snapshot(backend)
82+
except ExtensionNotLoadedError as e:
83+
print(f"Fix suggestion: {e}")
3084
"""
3185

3286
from .actions import click, scroll, scroll_to_element, type_text, wait_for_stable
3387
from .browser_use_adapter import BrowserUseAdapter, BrowserUseCDPTransport
3488
from .cdp_backend import CDPBackendV0, CDPTransport
89+
from .exceptions import (
90+
ActionError,
91+
BackendEvalError,
92+
ExtensionDiagnostics,
93+
ExtensionInjectionError,
94+
ExtensionNotLoadedError,
95+
SentienceBackendError,
96+
SnapshotError,
97+
)
3598
from .playwright_backend import PlaywrightBackend
3699
from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo
37100
from .snapshot import CachedSnapshot, snapshot
@@ -58,4 +121,12 @@
58121
"scroll",
59122
"scroll_to_element",
60123
"wait_for_stable",
124+
# Exceptions
125+
"SentienceBackendError",
126+
"ExtensionNotLoadedError",
127+
"ExtensionInjectionError",
128+
"ExtensionDiagnostics",
129+
"BackendEvalError",
130+
"SnapshotError",
131+
"ActionError",
61132
]

sentience/backends/actions.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -226,7 +226,8 @@ async def scroll_to_element(
226226
start_time = time.time()
227227

228228
try:
229-
scrolled = await backend.eval(f"""
229+
scrolled = await backend.eval(
230+
f"""
230231
(() => {{
231232
const el = window.sentience_registry && window.sentience_registry[{element_id}];
232233
if (el && el.scrollIntoView) {{
@@ -239,7 +240,8 @@ async def scroll_to_element(
239240
}}
240241
return false;
241242
}})()
242-
""")
243+
"""
244+
)
243245

244246
# Wait for scroll animation
245247
wait_time = 0.3 if behavior == "smooth" else 0.05

0 commit comments

Comments
 (0)