Skip to content

Commit b772c6e

Browse files
authored
Merge pull request #122 from SentienceAPI/typing
human like typing
2 parents 1d08fc1 + 7a20dc0 commit b772c6e

File tree

10 files changed

+386
-55
lines changed

10 files changed

+386
-55
lines changed

README.md

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,37 @@ with SentienceBrowser(headless=False) as browser:
102102

103103
---
104104

105+
## 🆕 What's New (2026-01-06)
106+
107+
### Human-like Typing
108+
Add realistic delays between keystrokes to mimic human typing:
109+
```python
110+
from sentience import type_text
111+
112+
# Type instantly (default)
113+
type_text(browser, element_id, "Hello World")
114+
115+
# Type with human-like delay (~10ms between keystrokes)
116+
type_text(browser, element_id, "Hello World", delay_ms=10)
117+
```
118+
119+
### Scroll to Element
120+
Scroll elements into view with smooth animation:
121+
```python
122+
from sentience import snapshot, find, scroll_to
123+
124+
snap = snapshot(browser)
125+
button = find(snap, 'role=button text~"Submit"')
126+
127+
# Scroll element into view with smooth animation
128+
scroll_to(browser, button.id)
129+
130+
# Scroll instantly to top of viewport
131+
scroll_to(browser, button.id, behavior='instant', block='start')
132+
```
133+
134+
---
135+
105136
<details>
106137
<summary><h2>💼 Real-World Example: Amazon Shopping Bot</h2></summary>
107138

sentience/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Sentience Python SDK - AI Agent Browser Automation
33
"""
44

5-
from .actions import click, click_rect, press, type_text
5+
from .actions import click, click_rect, press, scroll_to, type_text
66
from .agent import SentienceAgent, SentienceAgentAsync
77
from .agent_config import AgentConfig
88

@@ -90,6 +90,7 @@
9090
"click",
9191
"type_text",
9292
"press",
93+
"scroll_to",
9394
"click_rect",
9495
"wait_for",
9596
"expect",

sentience/actions.py

Lines changed: 204 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -134,7 +134,11 @@ def click( # noqa: C901
134134

135135

136136
def type_text(
137-
browser: SentienceBrowser, element_id: int, text: str, take_snapshot: bool = False
137+
browser: SentienceBrowser,
138+
element_id: int,
139+
text: str,
140+
take_snapshot: bool = False,
141+
delay_ms: float = 0,
138142
) -> ActionResult:
139143
"""
140144
Type text into an element (focus then input)
@@ -144,9 +148,16 @@ def type_text(
144148
element_id: Element ID from snapshot
145149
text: Text to type
146150
take_snapshot: Whether to take snapshot after action
151+
delay_ms: Delay between keystrokes in milliseconds for human-like typing (default: 0)
147152
148153
Returns:
149154
ActionResult
155+
156+
Example:
157+
>>> # Type instantly (default behavior)
158+
>>> type_text(browser, element_id, "Hello World")
159+
>>> # Type with human-like delay (~10ms between keystrokes)
160+
>>> type_text(browser, element_id, "Hello World", delay_ms=10)
150161
"""
151162
if not browser.page:
152163
raise RuntimeError("Browser not started. Call browser.start() first.")
@@ -177,8 +188,8 @@ def type_text(
177188
error={"code": "focus_failed", "reason": "Element not found"},
178189
)
179190

180-
# Type using Playwright keyboard
181-
browser.page.keyboard.type(text)
191+
# Type using Playwright keyboard with optional delay between keystrokes
192+
browser.page.keyboard.type(text, delay=delay_ms)
182193

183194
duration_ms = int((time.time() - start_time) * 1000)
184195
url_after = browser.page.url
@@ -242,6 +253,94 @@ def press(browser: SentienceBrowser, key: str, take_snapshot: bool = False) -> A
242253
)
243254

244255

256+
def scroll_to(
257+
browser: SentienceBrowser,
258+
element_id: int,
259+
behavior: str = "smooth",
260+
block: str = "center",
261+
take_snapshot: bool = False,
262+
) -> ActionResult:
263+
"""
264+
Scroll an element into view
265+
266+
Scrolls the page so that the specified element is visible in the viewport.
267+
Uses the element registry to find the element and scrollIntoView() to scroll it.
268+
269+
Args:
270+
browser: SentienceBrowser instance
271+
element_id: Element ID from snapshot to scroll into view
272+
behavior: Scroll behavior - 'smooth', 'instant', or 'auto' (default: 'smooth')
273+
block: Vertical alignment - 'start', 'center', 'end', or 'nearest' (default: 'center')
274+
take_snapshot: Whether to take snapshot after action
275+
276+
Returns:
277+
ActionResult
278+
279+
Example:
280+
>>> snap = snapshot(browser)
281+
>>> button = find(snap, 'role=button[name="Submit"]')
282+
>>> if button:
283+
>>> # Scroll element into view with smooth animation
284+
>>> scroll_to(browser, button.id)
285+
>>> # Scroll instantly to top of viewport
286+
>>> scroll_to(browser, button.id, behavior='instant', block='start')
287+
"""
288+
if not browser.page:
289+
raise RuntimeError("Browser not started. Call browser.start() first.")
290+
291+
start_time = time.time()
292+
url_before = browser.page.url
293+
294+
# Scroll element into view using the element registry
295+
scrolled = browser.page.evaluate(
296+
"""
297+
(args) => {
298+
const el = window.sentience_registry[args.id];
299+
if (el && el.scrollIntoView) {
300+
el.scrollIntoView({
301+
behavior: args.behavior,
302+
block: args.block,
303+
inline: 'nearest'
304+
});
305+
return true;
306+
}
307+
return false;
308+
}
309+
""",
310+
{"id": element_id, "behavior": behavior, "block": block},
311+
)
312+
313+
if not scrolled:
314+
return ActionResult(
315+
success=False,
316+
duration_ms=int((time.time() - start_time) * 1000),
317+
outcome="error",
318+
error={"code": "scroll_failed", "reason": "Element not found or not scrollable"},
319+
)
320+
321+
# Wait a bit for scroll to complete (especially for smooth scrolling)
322+
wait_time = 500 if behavior == "smooth" else 100
323+
browser.page.wait_for_timeout(wait_time)
324+
325+
duration_ms = int((time.time() - start_time) * 1000)
326+
url_after = browser.page.url
327+
url_changed = url_before != url_after
328+
329+
outcome = "navigated" if url_changed else "dom_updated"
330+
331+
snapshot_after: Snapshot | None = None
332+
if take_snapshot:
333+
snapshot_after = snapshot(browser)
334+
335+
return ActionResult(
336+
success=True,
337+
duration_ms=duration_ms,
338+
outcome=outcome,
339+
url_changed=url_changed,
340+
snapshot_after=snapshot_after,
341+
)
342+
343+
245344
def _highlight_rect(
246345
browser: SentienceBrowser, rect: dict[str, float], duration_sec: float = 2.0
247346
) -> None:
@@ -553,7 +652,11 @@ async def click_async(
553652

554653

555654
async def type_text_async(
556-
browser: AsyncSentienceBrowser, element_id: int, text: str, take_snapshot: bool = False
655+
browser: AsyncSentienceBrowser,
656+
element_id: int,
657+
text: str,
658+
take_snapshot: bool = False,
659+
delay_ms: float = 0,
557660
) -> ActionResult:
558661
"""
559662
Type text into an element (async)
@@ -563,9 +666,16 @@ async def type_text_async(
563666
element_id: Element ID from snapshot
564667
text: Text to type
565668
take_snapshot: Whether to take snapshot after action
669+
delay_ms: Delay between keystrokes in milliseconds for human-like typing (default: 0)
566670
567671
Returns:
568672
ActionResult
673+
674+
Example:
675+
>>> # Type instantly (default behavior)
676+
>>> await type_text_async(browser, element_id, "Hello World")
677+
>>> # Type with human-like delay (~10ms between keystrokes)
678+
>>> await type_text_async(browser, element_id, "Hello World", delay_ms=10)
569679
"""
570680
if not browser.page:
571681
raise RuntimeError("Browser not started. Call await browser.start() first.")
@@ -596,8 +706,8 @@ async def type_text_async(
596706
error={"code": "focus_failed", "reason": "Element not found"},
597707
)
598708

599-
# Type using Playwright keyboard
600-
await browser.page.keyboard.type(text)
709+
# Type using Playwright keyboard with optional delay between keystrokes
710+
await browser.page.keyboard.type(text, delay=delay_ms)
601711

602712
duration_ms = int((time.time() - start_time) * 1000)
603713
url_after = browser.page.url
@@ -663,6 +773,94 @@ async def press_async(
663773
)
664774

665775

776+
async def scroll_to_async(
777+
browser: AsyncSentienceBrowser,
778+
element_id: int,
779+
behavior: str = "smooth",
780+
block: str = "center",
781+
take_snapshot: bool = False,
782+
) -> ActionResult:
783+
"""
784+
Scroll an element into view (async)
785+
786+
Scrolls the page so that the specified element is visible in the viewport.
787+
Uses the element registry to find the element and scrollIntoView() to scroll it.
788+
789+
Args:
790+
browser: AsyncSentienceBrowser instance
791+
element_id: Element ID from snapshot to scroll into view
792+
behavior: Scroll behavior - 'smooth', 'instant', or 'auto' (default: 'smooth')
793+
block: Vertical alignment - 'start', 'center', 'end', or 'nearest' (default: 'center')
794+
take_snapshot: Whether to take snapshot after action
795+
796+
Returns:
797+
ActionResult
798+
799+
Example:
800+
>>> snap = await snapshot_async(browser)
801+
>>> button = find(snap, 'role=button[name="Submit"]')
802+
>>> if button:
803+
>>> # Scroll element into view with smooth animation
804+
>>> await scroll_to_async(browser, button.id)
805+
>>> # Scroll instantly to top of viewport
806+
>>> await scroll_to_async(browser, button.id, behavior='instant', block='start')
807+
"""
808+
if not browser.page:
809+
raise RuntimeError("Browser not started. Call await browser.start() first.")
810+
811+
start_time = time.time()
812+
url_before = browser.page.url
813+
814+
# Scroll element into view using the element registry
815+
scrolled = await browser.page.evaluate(
816+
"""
817+
(args) => {
818+
const el = window.sentience_registry[args.id];
819+
if (el && el.scrollIntoView) {
820+
el.scrollIntoView({
821+
behavior: args.behavior,
822+
block: args.block,
823+
inline: 'nearest'
824+
});
825+
return true;
826+
}
827+
return false;
828+
}
829+
""",
830+
{"id": element_id, "behavior": behavior, "block": block},
831+
)
832+
833+
if not scrolled:
834+
return ActionResult(
835+
success=False,
836+
duration_ms=int((time.time() - start_time) * 1000),
837+
outcome="error",
838+
error={"code": "scroll_failed", "reason": "Element not found or not scrollable"},
839+
)
840+
841+
# Wait a bit for scroll to complete (especially for smooth scrolling)
842+
wait_time = 500 if behavior == "smooth" else 100
843+
await browser.page.wait_for_timeout(wait_time)
844+
845+
duration_ms = int((time.time() - start_time) * 1000)
846+
url_after = browser.page.url
847+
url_changed = url_before != url_after
848+
849+
outcome = "navigated" if url_changed else "dom_updated"
850+
851+
snapshot_after: Snapshot | None = None
852+
if take_snapshot:
853+
snapshot_after = await snapshot_async(browser)
854+
855+
return ActionResult(
856+
success=True,
857+
duration_ms=duration_ms,
858+
outcome=outcome,
859+
url_changed=url_changed,
860+
snapshot_after=snapshot_after,
861+
)
862+
863+
666864
async def _highlight_rect_async(
667865
browser: AsyncSentienceBrowser, rect: dict[str, float], duration_sec: float = 2.0
668866
) -> None:

sentience/async_api.py

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,13 @@
2323

2424
# ========== Actions (Phase 1) ==========
2525
# Re-export async action functions from actions.py
26-
from sentience.actions import click_async, click_rect_async, press_async, type_text_async
26+
from sentience.actions import (
27+
click_async,
28+
click_rect_async,
29+
press_async,
30+
scroll_to_async,
31+
type_text_async,
32+
)
2733

2834
# ========== Phase 2C: Agent Layer ==========
2935
# Re-export async agent classes from agent.py and base_agent.py
@@ -76,6 +82,7 @@
7682
"click_async", # Re-exported from actions.py
7783
"type_text_async", # Re-exported from actions.py
7884
"press_async", # Re-exported from actions.py
85+
"scroll_to_async", # Re-exported from actions.py
7986
"click_rect_async", # Re-exported from actions.py
8087
# Phase 2A: Core Utilities
8188
"wait_for_async", # Re-exported from wait.py

sentience/extension/background.js

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,14 +28,14 @@ async function handleSnapshotProcessing(rawData, options = {}) {
2828
const startTime = performance.now();
2929
try {
3030
if (!Array.isArray(rawData)) throw new Error("rawData must be an array");
31-
if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(),
31+
if (rawData.length > 1e4 && (rawData = rawData.slice(0, 1e4)), await initWASM(),
3232
!wasmReady) throw new Error("WASM module not initialized");
3333
let analyzedElements, prunedRawData;
3434
try {
3535
const wasmPromise = new Promise((resolve, reject) => {
3636
try {
3737
let result;
38-
result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData),
38+
result = options.limit || options.filter ? analyze_page_with_options(rawData, options) : analyze_page(rawData),
3939
resolve(result);
4040
} catch (e) {
4141
reject(e);
@@ -101,4 +101,4 @@ initWASM().catch(err => {}), chrome.runtime.onMessage.addListener((request, send
101101
event.preventDefault();
102102
}), self.addEventListener("unhandledrejection", event => {
103103
event.preventDefault();
104-
});
104+
});

0 commit comments

Comments
 (0)