Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion sentience/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -107,7 +107,7 @@
from .ordinal import OrdinalIntent, boost_ordinal_elements, detect_ordinal_intent, select_by_ordinal
from .overlay import clear_overlay, show_overlay
from .query import find, query
from .read import extract, extract_async, read
from .read import extract, extract_async, read, read_best_effort
from .recorder import Recorder, Trace, TraceStep, record
from .runtime_agent import RuntimeAgent, RuntimeStep, StepVerification
from .screenshot import screenshot
Expand Down Expand Up @@ -220,6 +220,7 @@
"ScriptGenerator",
"generate",
"read",
"read_best_effort",
"screenshot",
"show_overlay",
"clear_overlay",
Expand Down
23 changes: 23 additions & 0 deletions sentience/agent_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -445,6 +445,29 @@ def _is_captcha_detected(self, snapshot: Snapshot) -> bool:
captcha = getattr(snapshot.diagnostics, "captcha", None) if snapshot.diagnostics else None
if not captcha or not getattr(captcha, "detected", False):
return False
# IMPORTANT: Many sites load CAPTCHA libraries proactively. We only want to
# block execution when there's evidence it's actually *present/active*.
# If we block on low-signal detections (e.g. just a recaptcha script tag),
# interactive runs will “do nothing” and time out.
evidence = getattr(captcha, "evidence", None)
if evidence is not None:
def _list(name: str) -> list[str]:
try:
v = getattr(evidence, name, None)
except Exception:
v = None
if v is None and isinstance(evidence, dict):
v = evidence.get(name)
if not v:
return []
return [str(x) for x in v if x is not None]

iframe_hits = _list("iframe_src_hits")
url_hits = _list("url_hits")
text_hits = _list("text_hits")
# If we only saw selector/script hints, treat as non-blocking.
if not iframe_hits and not url_hits and not text_hits:
return False
confidence = getattr(captcha, "confidence", 0.0)
return confidence >= self._captcha_options.min_confidence

Expand Down
5 changes: 3 additions & 2 deletions sentience/async_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,8 +52,8 @@
from sentience.query import find, query

# ========== Phase 2B: Supporting Utilities ==========
# Re-export async read function from read.py
from sentience.read import read_async
# Re-export async read functions from read.py
from sentience.read import read_async, read_best_effort_async

# ========== Phase 2D: Developer Tools ==========
# Re-export async recorder and inspector from their modules
Expand Down Expand Up @@ -90,6 +90,7 @@
"find_text_rect_async", # Re-exported from text_search.py
# Phase 2B: Supporting Utilities
"read_async", # Re-exported from read.py
"read_best_effort_async", # Re-exported from read.py
"show_overlay_async", # Re-exported from overlay.py
"clear_overlay_async", # Re-exported from overlay.py
"expect_async", # Re-exported from expect.py
Expand Down
5 changes: 3 additions & 2 deletions sentience/backends/actions.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,7 @@ async def type_text(
text: str,
target: BBox | dict[str, float] | tuple[float, float] | None = None,
clear_first: bool = False,
delay_ms: float | None = None,
) -> ActionResult:
"""
Type text, optionally clicking a target first.
Expand Down Expand Up @@ -159,8 +160,8 @@ async def type_text(
await backend.eval("document.execCommand('selectAll')")
await asyncio.sleep(0.02)

# Type the text
await backend.type_text(text)
# Type the text (optional human-like delay)
await backend.type_text(text, delay_ms=delay_ms)

duration_ms = int((time.time() - start_time) * 1000)
return ActionResult(
Expand Down
9 changes: 6 additions & 3 deletions sentience/backends/cdp_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -342,8 +342,10 @@ async def wheel(
},
)

async def type_text(self, text: str) -> None:
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
"""Type text using keyboard input."""
# Preserve historical default (~10ms) unless caller overrides.
per_char_delay_s = 0.01 if delay_ms is None else max(0.0, float(delay_ms) / 1000.0)
for char in text:
# Key down
await self._transport.send(
Expand Down Expand Up @@ -372,8 +374,9 @@ async def type_text(self, text: str) -> None:
},
)

# Small delay between characters
await asyncio.sleep(0.01)
# Delay between characters (human-like typing when requested)
if per_char_delay_s:
await asyncio.sleep(per_char_delay_s)

async def wait_ready_state(
self,
Expand Down
5 changes: 3 additions & 2 deletions sentience/backends/playwright_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -315,9 +315,10 @@ async def wheel(

await self._page.mouse.wheel(0, delta_y)

async def type_text(self, text: str) -> None:
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
"""Type text using keyboard input."""
await self._page.keyboard.type(text)
delay = 0 if delay_ms is None else max(0, float(delay_ms))
await self._page.keyboard.type(text, delay=delay)

async def wait_ready_state(
self,
Expand Down
4 changes: 3 additions & 1 deletion sentience/backends/protocol.py
Original file line number Diff line number Diff line change
Expand Up @@ -188,14 +188,16 @@ async def wheel(
"""
...

async def type_text(self, text: str) -> None:
async def type_text(self, text: str, delay_ms: float | None = None) -> None:
"""
Type text using keyboard input.

Uses CDP Input.dispatchKeyEvent for each character.

Args:
text: Text to type
delay_ms: Optional delay between keystrokes in milliseconds.
If None, backend default behavior is used.
"""
...

Expand Down
8 changes: 8 additions & 0 deletions sentience/llm_provider.py
Original file line number Diff line number Diff line change
Expand Up @@ -343,6 +343,14 @@ def __init__(
base_url: str = "https://api.deepinfra.com/v1/openai",
):
api_key = get_api_key_from_env(["DEEPINFRA_TOKEN", "DEEPINFRA_API_KEY"], api_key)
# IMPORTANT: If we pass api_key=None to the OpenAI SDK client, it may
# implicitly fall back to OPENAI_API_KEY from the environment.
# That leads to confusing 401s against DeepInfra with an OpenAI key.
if not api_key:
raise RuntimeError(
"DeepInfra API key is missing. Set DEEPINFRA_API_KEY (or DEEPINFRA_TOKEN), "
"or pass api_key=... to DeepInfraProvider."
)
super().__init__(api_key=api_key, model=model, base_url=base_url)


Expand Down
Loading
Loading