Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
41 changes: 41 additions & 0 deletions examples/human_cursor_click_demo.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
"""
Human-like cursor movement demo (Python SDK).

This example shows how to opt into human-like mouse movement before clicking,
and how to read the returned cursor metadata for tracing/debugging.
"""

from __future__ import annotations

from sentience import CursorPolicy, SentienceBrowser, click, find, snapshot


def main() -> None:
# NOTE: This uses a real browser via Playwright.
with SentienceBrowser() as browser:
browser.page.goto("https://example.com")
browser.page.wait_for_load_state("networkidle")

snap = snapshot(browser)
link = find(snap, "role=link")
if not link:
raise RuntimeError("No link found on page")

policy = CursorPolicy(
mode="human",
steps=18, # more steps => smoother
duration_ms=350,
jitter_px=1.2,
overshoot_px=6.0,
pause_before_click_ms=30,
seed=123, # optional: makes motion deterministic for demos/tests
)

result = click(browser, link.id, use_mouse=True, cursor_policy=policy)
print("clicked:", result.success, "outcome:", result.outcome)
print("cursor meta:", result.cursor)


if __name__ == "__main__":
main()

2 changes: 2 additions & 0 deletions sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
# Tracing (v0.12.0+)
from .cloud_tracing import CloudTraceSink, SentienceLogger
from .conversational_agent import ConversationalAgent
from .cursor_policy import CursorPolicy
from .expect import expect
from .generator import ScriptGenerator, generate
from .inspector import Inspector, inspect
Expand Down Expand Up @@ -172,6 +173,7 @@
"press",
"scroll_to",
"click_rect",
"CursorPolicy",
"wait_for",
"expect",
"Inspector",
Expand Down
2 changes: 2 additions & 0 deletions sentience/action_executor.py
Original file line number Diff line number Diff line change
Expand Up @@ -96,6 +96,7 @@ def execute(self, action_str: str, snap: Snapshot) -> dict[str, Any]:
"element_id": element_id,
"outcome": result.outcome,
"url_changed": result.url_changed,
"cursor": getattr(result, "cursor", None),
}

# Parse TYPE(42, "hello world")
Expand Down Expand Up @@ -170,6 +171,7 @@ async def execute_async(self, action_str: str, snap: Snapshot) -> dict[str, Any]
"element_id": element_id,
"outcome": result.outcome,
"url_changed": result.url_changed,
"cursor": getattr(result, "cursor", None),
}

# Parse TYPE(42, "hello world")
Expand Down
153 changes: 150 additions & 3 deletions sentience/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@
Actions v1 - click, type, press
"""

import asyncio
import time

from .browser import AsyncSentienceBrowser, SentienceBrowser
from .browser_evaluator import BrowserEvaluator
from .cursor_policy import CursorPolicy, build_human_cursor_path
from .models import ActionResult, BBox, Snapshot
from .sentience_methods import SentienceMethod
from .snapshot import snapshot, snapshot_async
Expand All @@ -18,6 +20,7 @@ def click( # noqa: C901
element_id: int,
use_mouse: bool = True,
take_snapshot: bool = False,
cursor_policy: CursorPolicy | None = None,
) -> ActionResult:
"""
Click an element by ID using hybrid approach (mouse simulation by default)
Expand All @@ -37,6 +40,7 @@ def click( # noqa: C901

start_time = time.time()
url_before = browser.page.url
cursor_meta: dict | None = None

if use_mouse:
# Hybrid approach: Get element bbox from snapshot, calculate center, use mouse.click()
Expand All @@ -52,9 +56,49 @@ def click( # noqa: C901
# Calculate center of element bbox
center_x = element.bbox.x + element.bbox.width / 2
center_y = element.bbox.y + element.bbox.height / 2
# Use Playwright's native mouse click for realistic simulation
# Optional: human-like cursor movement (opt-in)
try:
browser.page.mouse.click(center_x, center_y)
if cursor_policy is not None and cursor_policy.mode == "human":
# Best-effort cursor state on browser instance
pos = getattr(browser, "_sentience_cursor_pos", None)
if not isinstance(pos, tuple) or len(pos) != 2:
try:
vp = browser.page.viewport_size or {}
pos = (
float(vp.get("width", 0)) / 2.0,
float(vp.get("height", 0)) / 2.0,
)
except Exception:
pos = (0.0, 0.0)

cursor_meta = build_human_cursor_path(
start=(float(pos[0]), float(pos[1])),
target=(float(center_x), float(center_y)),
policy=cursor_policy,
)
pts = cursor_meta.get("path", [])
steps = int(cursor_meta.get("steps") or max(1, len(pts)))
duration_ms = int(cursor_meta.get("duration_ms") or 0)
per_step_s = (
(duration_ms / max(1, len(pts))) / 1000.0 if duration_ms > 0 else 0.0
)
for p in pts:
browser.page.mouse.move(float(p["x"]), float(p["y"]))
if per_step_s > 0:
time.sleep(per_step_s)
pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
if pause_ms > 0:
time.sleep(pause_ms / 1000.0)
browser.page.mouse.click(center_x, center_y)
setattr(
browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
)
else:
# Default behavior (no regression)
browser.page.mouse.click(center_x, center_y)
setattr(
browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
)
success = True
except Exception:
# If navigation happens, mouse.click might fail, but that's OK
Expand Down Expand Up @@ -122,6 +166,7 @@ def click( # noqa: C901
outcome=outcome,
url_changed=url_changed,
snapshot_after=snapshot_after,
cursor=cursor_meta,
error=(
None
if success
Expand Down Expand Up @@ -414,6 +459,7 @@ def click_rect(
highlight: bool = True,
highlight_duration: float = 2.0,
take_snapshot: bool = False,
cursor_policy: CursorPolicy | None = None,
) -> ActionResult:
"""
Click at the center of a rectangle using Playwright's native mouse simulation.
Expand Down Expand Up @@ -469,6 +515,7 @@ def click_rect(
# Calculate center of rectangle
center_x = x + w / 2
center_y = y + h / 2
cursor_meta: dict | None = None

# Show highlight before clicking (if enabled)
if highlight:
Expand All @@ -479,7 +526,35 @@ def click_rect(
# Use Playwright's native mouse click for realistic simulation
# This triggers hover, focus, mousedown, mouseup sequences
try:
if cursor_policy is not None and cursor_policy.mode == "human":
pos = getattr(browser, "_sentience_cursor_pos", None)
if not isinstance(pos, tuple) or len(pos) != 2:
try:
vp = browser.page.viewport_size or {}
pos = (float(vp.get("width", 0)) / 2.0, float(vp.get("height", 0)) / 2.0)
except Exception:
pos = (0.0, 0.0)

cursor_meta = build_human_cursor_path(
start=(float(pos[0]), float(pos[1])),
target=(float(center_x), float(center_y)),
policy=cursor_policy,
)
pts = cursor_meta.get("path", [])
duration_ms_move = int(cursor_meta.get("duration_ms") or 0)
per_step_s = (
(duration_ms_move / max(1, len(pts))) / 1000.0 if duration_ms_move > 0 else 0.0
)
for p in pts:
browser.page.mouse.move(float(p["x"]), float(p["y"]))
if per_step_s > 0:
time.sleep(per_step_s)
pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
if pause_ms > 0:
time.sleep(pause_ms / 1000.0)

browser.page.mouse.click(center_x, center_y)
setattr(browser, "_sentience_cursor_pos", (float(center_x), float(center_y)))
success = True
except Exception as e:
success = False
Expand Down Expand Up @@ -512,6 +587,7 @@ def click_rect(
outcome=outcome,
url_changed=url_changed,
snapshot_after=snapshot_after,
cursor=cursor_meta,
error=(
None
if success
Expand All @@ -531,6 +607,7 @@ async def click_async(
element_id: int,
use_mouse: bool = True,
take_snapshot: bool = False,
cursor_policy: CursorPolicy | None = None,
) -> ActionResult:
"""
Click an element by ID using hybrid approach (async)
Expand All @@ -549,6 +626,7 @@ async def click_async(

start_time = time.time()
url_before = browser.page.url
cursor_meta: dict | None = None

if use_mouse:
try:
Expand All @@ -563,7 +641,44 @@ async def click_async(
center_x = element.bbox.x + element.bbox.width / 2
center_y = element.bbox.y + element.bbox.height / 2
try:
await browser.page.mouse.click(center_x, center_y)
if cursor_policy is not None and cursor_policy.mode == "human":
pos = getattr(browser, "_sentience_cursor_pos", None)
if not isinstance(pos, tuple) or len(pos) != 2:
try:
vp = browser.page.viewport_size or {}
pos = (
float(vp.get("width", 0)) / 2.0,
float(vp.get("height", 0)) / 2.0,
)
except Exception:
pos = (0.0, 0.0)

cursor_meta = build_human_cursor_path(
start=(float(pos[0]), float(pos[1])),
target=(float(center_x), float(center_y)),
policy=cursor_policy,
)
pts = cursor_meta.get("path", [])
duration_ms = int(cursor_meta.get("duration_ms") or 0)
per_step_s = (
(duration_ms / max(1, len(pts))) / 1000.0 if duration_ms > 0 else 0.0
)
for p in pts:
await browser.page.mouse.move(float(p["x"]), float(p["y"]))
if per_step_s > 0:
await asyncio.sleep(per_step_s)
pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
if pause_ms > 0:
await asyncio.sleep(pause_ms / 1000.0)
await browser.page.mouse.click(center_x, center_y)
setattr(
browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
)
else:
await browser.page.mouse.click(center_x, center_y)
setattr(
browser, "_sentience_cursor_pos", (float(center_x), float(center_y))
)
success = True
except Exception:
success = True
Expand Down Expand Up @@ -640,6 +755,7 @@ async def click_async(
outcome=outcome,
url_changed=url_changed,
snapshot_after=snapshot_after,
cursor=cursor_meta,
error=(
None
if success
Expand Down Expand Up @@ -922,6 +1038,7 @@ async def click_rect_async(
highlight: bool = True,
highlight_duration: float = 2.0,
take_snapshot: bool = False,
cursor_policy: CursorPolicy | None = None,
) -> ActionResult:
"""
Click at the center of a rectangle (async)
Expand Down Expand Up @@ -968,6 +1085,7 @@ async def click_rect_async(
# Calculate center of rectangle
center_x = x + w / 2
center_y = y + h / 2
cursor_meta: dict | None = None

# Show highlight before clicking
if highlight:
Expand All @@ -976,7 +1094,35 @@ async def click_rect_async(

# Use Playwright's native mouse click
try:
if cursor_policy is not None and cursor_policy.mode == "human":
pos = getattr(browser, "_sentience_cursor_pos", None)
if not isinstance(pos, tuple) or len(pos) != 2:
try:
vp = browser.page.viewport_size or {}
pos = (float(vp.get("width", 0)) / 2.0, float(vp.get("height", 0)) / 2.0)
except Exception:
pos = (0.0, 0.0)

cursor_meta = build_human_cursor_path(
start=(float(pos[0]), float(pos[1])),
target=(float(center_x), float(center_y)),
policy=cursor_policy,
)
pts = cursor_meta.get("path", [])
duration_ms_move = int(cursor_meta.get("duration_ms") or 0)
per_step_s = (
(duration_ms_move / max(1, len(pts))) / 1000.0 if duration_ms_move > 0 else 0.0
)
for p in pts:
await browser.page.mouse.move(float(p["x"]), float(p["y"]))
if per_step_s > 0:
await asyncio.sleep(per_step_s)
pause_ms = int(cursor_meta.get("pause_before_click_ms") or 0)
if pause_ms > 0:
await asyncio.sleep(pause_ms / 1000.0)

await browser.page.mouse.click(center_x, center_y)
setattr(browser, "_sentience_cursor_pos", (float(center_x), float(center_y)))
success = True
except Exception as e:
success = False
Expand Down Expand Up @@ -1009,6 +1155,7 @@ async def click_rect_async(
outcome=outcome,
url_changed=url_changed,
snapshot_after=snapshot_after,
cursor=cursor_meta,
error=(
None
if success
Expand Down
4 changes: 4 additions & 0 deletions sentience/agent.py
Original file line number Diff line number Diff line change
Expand Up @@ -355,6 +355,7 @@ def act( # noqa: C901
url_changed=result_dict.get("url_changed"),
error=result_dict.get("error"),
message=result_dict.get("message"),
cursor=result_dict.get("cursor"),
)

# Emit action execution trace event if tracer is enabled
Expand Down Expand Up @@ -391,6 +392,7 @@ def act( # noqa: C901
"post_url": post_url,
"elements": elements_data, # Add element data for overlay
"target_element_id": result.element_id, # Highlight target in red
"cursor": result.cursor,
},
step_id=step_id,
)
Expand Down Expand Up @@ -445,6 +447,8 @@ def act( # noqa: C901
),
"duration_ms": duration_ms,
}
if result.cursor is not None:
exec_data["cursor"] = result.cursor

# Add optional exec fields
if result.element_id is not None:
Expand Down
2 changes: 1 addition & 1 deletion sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -341,7 +341,7 @@ def assert_done(
True if task is complete (assertion passed), False otherwise
"""
# Convenience wrapper for assert_ with required=True
ok = self.assert_(predicate, label=label, required=True)
ok = self.assertTrue(predicate, label=label, required=True)
if ok:
self._task_done = True
self._task_done_label = label
Expand Down
Loading
Loading