Skip to content

Commit 4e6e450

Browse files
authored
Merge pull request #128 from SentienceAPI/bu_phase2
Phase 2: browser use support with Backend protocol full integration
2 parents 9c62a9e + b662c4e commit 4e6e450

File tree

12 files changed

+2116
-92
lines changed

12 files changed

+2116
-92
lines changed
Lines changed: 206 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,206 @@
1+
"""
2+
Example: Using Sentience with browser-use for element grounding.
3+
4+
This example demonstrates how to integrate Sentience's semantic element
5+
detection with browser-use, enabling accurate click/type/scroll operations
6+
using Sentience's snapshot-based grounding instead of coordinate estimation.
7+
8+
Requirements:
9+
pip install browser-use sentienceapi
10+
11+
Usage:
12+
python examples/browser_use_integration.py
13+
"""
14+
15+
import asyncio
16+
17+
# Sentience imports
18+
from sentience import find, get_extension_dir, query
19+
from sentience.backends import (
20+
BrowserUseAdapter,
21+
CachedSnapshot,
22+
ExtensionNotLoadedError,
23+
click,
24+
scroll,
25+
snapshot,
26+
type_text,
27+
)
28+
29+
# browser-use imports (install via: pip install browser-use)
30+
# from browser_use import BrowserSession, BrowserProfile
31+
32+
33+
async def main() -> None:
34+
"""
35+
Demo: Search on Google using Sentience grounding with browser-use.
36+
37+
This example shows the full workflow:
38+
1. Launch browser-use with Sentience extension loaded
39+
2. Create a Sentience backend adapter
40+
3. Take snapshots and interact with elements using semantic queries
41+
"""
42+
43+
# =========================================================================
44+
# STEP 1: Setup browser-use with Sentience extension
45+
# =========================================================================
46+
#
47+
# The Sentience extension must be loaded for element grounding to work.
48+
# Use get_extension_dir() to get the path to the bundled extension.
49+
#
50+
# Uncomment the following when running with browser-use installed:
51+
52+
# extension_path = get_extension_dir()
53+
# print(f"Loading Sentience extension from: {extension_path}")
54+
#
55+
# profile = BrowserProfile(
56+
# args=[
57+
# f"--load-extension={extension_path}",
58+
# "--disable-extensions-except=" + extension_path,
59+
# ],
60+
# )
61+
# session = BrowserSession(browser_profile=profile)
62+
# await session.start()
63+
64+
# =========================================================================
65+
# STEP 2: Create Sentience backend adapter
66+
# =========================================================================
67+
#
68+
# The adapter bridges browser-use's CDP client to Sentience's backend protocol.
69+
#
70+
# adapter = BrowserUseAdapter(session)
71+
# backend = await adapter.create_backend()
72+
73+
# =========================================================================
74+
# STEP 3: Navigate and take snapshots
75+
# =========================================================================
76+
#
77+
# await session.navigate("https://www.google.com")
78+
#
79+
# # Take a snapshot - this uses the Sentience extension's element detection
80+
# try:
81+
# snap = await snapshot(backend)
82+
# print(f"Found {len(snap.elements)} elements")
83+
# except ExtensionNotLoadedError as e:
84+
# print(f"Extension not loaded: {e}")
85+
# print("Make sure the browser was launched with --load-extension flag")
86+
# return
87+
88+
# =========================================================================
89+
# STEP 4: Find and interact with elements using semantic queries
90+
# =========================================================================
91+
#
92+
# Sentience provides powerful element selectors:
93+
# - Role-based: 'role=textbox', 'role=button'
94+
# - Name-based: 'role=button[name="Submit"]'
95+
# - Text-based: 'text=Search'
96+
#
97+
# # Find the search input
98+
# search_input = find(snap, 'role=textbox[name*="Search"]')
99+
# if search_input:
100+
# # Click on the search input (uses center of bounding box)
101+
# await click(backend, search_input.bbox)
102+
#
103+
# # Type search query
104+
# await type_text(backend, "Sentience AI browser automation")
105+
# print("Typed search query")
106+
107+
# =========================================================================
108+
# STEP 5: Using cached snapshots for efficiency
109+
# =========================================================================
110+
#
111+
# Taking snapshots has overhead. Use CachedSnapshot to reuse recent snapshots:
112+
#
113+
# cache = CachedSnapshot(backend, max_age_ms=2000)
114+
#
115+
# # First call takes fresh snapshot
116+
# snap1 = await cache.get()
117+
#
118+
# # Second call returns cached version if less than 2 seconds old
119+
# snap2 = await cache.get()
120+
#
121+
# # After actions that modify DOM, invalidate the cache
122+
# await click(backend, some_element.bbox)
123+
# cache.invalidate() # Next get() will take fresh snapshot
124+
125+
# =========================================================================
126+
# STEP 6: Scrolling to elements
127+
# =========================================================================
128+
#
129+
# # Scroll down by 500 pixels
130+
# await scroll(backend, delta_y=500)
131+
#
132+
# # Scroll at a specific position (useful for scrollable containers)
133+
# await scroll(backend, delta_y=300, target=(400, 500))
134+
135+
# =========================================================================
136+
# STEP 7: Advanced element queries
137+
# =========================================================================
138+
#
139+
# # Find all buttons
140+
# buttons = query(snap, 'role=button')
141+
# print(f"Found {len(buttons)} buttons")
142+
#
143+
# # Find by partial text match
144+
# links = query(snap, 'role=link[name*="Learn"]')
145+
#
146+
# # Find by exact text
147+
# submit_btn = find(snap, 'role=button[name="Submit"]')
148+
149+
# =========================================================================
150+
# STEP 8: Error handling
151+
# =========================================================================
152+
#
153+
# Sentience provides specific exceptions for common errors:
154+
#
155+
# from sentience.backends import (
156+
# ExtensionNotLoadedError, # Extension not loaded in browser
157+
# SnapshotError, # Snapshot failed
158+
# ActionError, # Click/type/scroll failed
159+
# )
160+
#
161+
# try:
162+
# snap = await snapshot(backend)
163+
# except ExtensionNotLoadedError as e:
164+
# # The error message includes fix suggestions
165+
# print(f"Fix: {e}")
166+
167+
# =========================================================================
168+
# CLEANUP
169+
# =========================================================================
170+
#
171+
# await session.stop()
172+
173+
print("=" * 60)
174+
print("browser-use + Sentience Integration Example")
175+
print("=" * 60)
176+
print()
177+
print("This example demonstrates the integration pattern.")
178+
print("To run with a real browser, uncomment the code sections above")
179+
print("and install browser-use: pip install browser-use")
180+
print()
181+
print("Key imports:")
182+
print(" from sentience import get_extension_dir, find, query")
183+
print(" from sentience.backends import (")
184+
print(" BrowserUseAdapter, snapshot, click, type_text, scroll")
185+
print(" )")
186+
print()
187+
print("Extension path:", get_extension_dir())
188+
189+
190+
async def full_example() -> None:
191+
"""
192+
Complete working example - requires browser-use installed.
193+
194+
This is the uncommented version for users who have browser-use installed.
195+
"""
196+
# Import browser-use (uncomment when installed)
197+
# from browser_use import BrowserSession, BrowserProfile
198+
199+
print("To run the full example:")
200+
print("1. Install browser-use: pip install browser-use")
201+
print("2. Uncomment the imports in this function")
202+
print("3. Run: python examples/browser_use_integration.py")
203+
204+
205+
if __name__ == "__main__":
206+
asyncio.run(main())

sentience/__init__.py

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -16,16 +16,25 @@
1616
from .agent_config import AgentConfig
1717
from .agent_runtime import AgentRuntime
1818

19+
# Backend-agnostic actions (aliased to avoid conflict with existing actions)
1920
# Browser backends (for browser-use integration)
2021
from .backends import (
2122
BrowserBackendV0,
2223
BrowserUseAdapter,
2324
BrowserUseCDPTransport,
25+
CachedSnapshot,
2426
CDPBackendV0,
2527
CDPTransport,
2628
LayoutMetrics,
29+
PlaywrightBackend,
2730
ViewportInfo,
2831
)
32+
from .backends import click as backend_click
33+
from .backends import scroll as backend_scroll
34+
from .backends import scroll_to_element as backend_scroll_to_element
35+
from .backends import snapshot as backend_snapshot
36+
from .backends import type_text as backend_type_text
37+
from .backends import wait_for_stable as backend_wait_for_stable
2938

3039
# Agent Layer (Phase 1 & 2)
3140
from .base_agent import BaseAgent
@@ -109,7 +118,7 @@
109118
from .visual_agent import SentienceVisualAgent, SentienceVisualAgentAsync
110119
from .wait import wait_for
111120

112-
__version__ = "0.92.3"
121+
__version__ = "0.93.0"
113122

114123
__all__ = [
115124
# Extension helpers (for browser-use integration)
@@ -123,10 +132,19 @@
123132
"BrowserBackendV0",
124133
"CDPTransport",
125134
"CDPBackendV0",
135+
"PlaywrightBackend",
126136
"BrowserUseAdapter",
127137
"BrowserUseCDPTransport",
128138
"ViewportInfo",
129139
"LayoutMetrics",
140+
"backend_snapshot",
141+
"CachedSnapshot",
142+
# Backend-agnostic actions (prefixed to avoid conflicts)
143+
"backend_click",
144+
"backend_type_text",
145+
"backend_scroll",
146+
"backend_scroll_to_element",
147+
"backend_wait_for_stable",
130148
# Core SDK
131149
"SentienceBrowser",
132150
"Snapshot",

sentience/backends/__init__.py

Lines changed: 94 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -5,14 +5,29 @@
55
Sentience actions (click, type, scroll) to work with different browser
66
automation frameworks.
77
8-
Supported backends:
9-
- PlaywrightBackend: Default backend using Playwright (existing SentienceBrowser)
10-
- CDPBackendV0: CDP-based backend for browser-use integration
8+
Supported Backends
9+
------------------
10+
11+
**PlaywrightBackend**
12+
Wraps Playwright Page objects. Use this when integrating with existing
13+
SentienceBrowser or Playwright-based code.
14+
15+
**CDPBackendV0**
16+
Low-level CDP (Chrome DevTools Protocol) backend. Use this when you have
17+
direct access to a CDP client and session.
18+
19+
**BrowserUseAdapter**
20+
High-level adapter for browser-use framework. Automatically creates a
21+
CDPBackendV0 from a BrowserSession.
22+
23+
Quick Start with browser-use
24+
----------------------------
25+
26+
.. code-block:: python
1127
12-
For browser-use integration:
1328
from browser_use import BrowserSession, BrowserProfile
14-
from sentience import get_extension_dir
15-
from sentience.backends import BrowserUseAdapter, CDPBackendV0
29+
from sentience import get_extension_dir, find
30+
from sentience.backends import BrowserUseAdapter, snapshot, click, type_text
1631
1732
# Setup browser-use with Sentience extension
1833
profile = BrowserProfile(args=[f"--load-extension={get_extension_dir()}"])
@@ -23,13 +38,66 @@
2338
adapter = BrowserUseAdapter(session)
2439
backend = await adapter.create_backend()
2540
26-
# Use backend for precise operations
27-
await backend.mouse_click(100, 200)
41+
# Take snapshot and interact with elements
42+
snap = await snapshot(backend)
43+
search_box = find(snap, 'role=textbox[name*="Search"]')
44+
await click(backend, search_box.bbox)
45+
await type_text(backend, "Sentience AI")
46+
47+
Snapshot Caching
48+
----------------
49+
50+
Use CachedSnapshot to reduce redundant snapshot calls in action loops:
51+
52+
.. code-block:: python
53+
54+
from sentience.backends import CachedSnapshot
55+
56+
cache = CachedSnapshot(backend, max_age_ms=2000)
57+
58+
snap1 = await cache.get() # Takes fresh snapshot
59+
snap2 = await cache.get() # Returns cached if < 2s old
60+
61+
await click(backend, element.bbox)
62+
cache.invalidate() # Force refresh on next get()
63+
64+
Error Handling
65+
--------------
66+
67+
The module provides specific exceptions for common failure modes:
68+
69+
- ``ExtensionNotLoadedError``: Extension not loaded in browser launch args
70+
- ``SnapshotError``: window.sentience.snapshot() failed
71+
- ``ActionError``: Click/type/scroll operation failed
72+
73+
All exceptions inherit from ``SentienceBackendError`` and include helpful
74+
fix suggestions in their error messages.
75+
76+
.. code-block:: python
77+
78+
from sentience.backends import ExtensionNotLoadedError, snapshot
79+
80+
try:
81+
snap = await snapshot(backend)
82+
except ExtensionNotLoadedError as e:
83+
print(f"Fix suggestion: {e}")
2884
"""
2985

86+
from .actions import click, scroll, scroll_to_element, type_text, wait_for_stable
3087
from .browser_use_adapter import BrowserUseAdapter, BrowserUseCDPTransport
3188
from .cdp_backend import CDPBackendV0, CDPTransport
89+
from .exceptions import (
90+
ActionError,
91+
BackendEvalError,
92+
ExtensionDiagnostics,
93+
ExtensionInjectionError,
94+
ExtensionNotLoadedError,
95+
SentienceBackendError,
96+
SnapshotError,
97+
)
98+
from .playwright_backend import PlaywrightBackend
3299
from .protocol_v0 import BrowserBackendV0, LayoutMetrics, ViewportInfo
100+
from .snapshot import CachedSnapshot, snapshot
33101

34102
__all__ = [
35103
# Protocol
@@ -40,7 +108,25 @@
40108
# CDP Backend
41109
"CDPTransport",
42110
"CDPBackendV0",
111+
# Playwright Backend
112+
"PlaywrightBackend",
43113
# browser-use adapter
44114
"BrowserUseAdapter",
45115
"BrowserUseCDPTransport",
116+
# Backend-agnostic functions
117+
"snapshot",
118+
"CachedSnapshot",
119+
"click",
120+
"type_text",
121+
"scroll",
122+
"scroll_to_element",
123+
"wait_for_stable",
124+
# Exceptions
125+
"SentienceBackendError",
126+
"ExtensionNotLoadedError",
127+
"ExtensionInjectionError",
128+
"ExtensionDiagnostics",
129+
"BackendEvalError",
130+
"SnapshotError",
131+
"ActionError",
46132
]

0 commit comments

Comments
 (0)