Skip to content

Commit f64e3cd

Browse files
authored
Merge pull request #208 from SentienceAPI/tweak_time_recaptcha
timeout param + passive captcha handling
2 parents dc9dabb + c6e9543 commit f64e3cd

File tree

6 files changed

+189
-5
lines changed

6 files changed

+189
-5
lines changed

sentience/agent_runtime.py

Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -612,9 +612,52 @@ def _list(name: str) -> list[str]:
612612
iframe_hits = _list("iframe_src_hits")
613613
url_hits = _list("url_hits")
614614
text_hits = _list("text_hits")
615+
selector_hits = _list("selector_hits")
616+
615617
# If we only saw selector/script hints, treat as non-blocking.
616618
if not iframe_hits and not url_hits and not text_hits:
617619
return False
620+
621+
# Heuristic: many sites include a passive reCAPTCHA badge (v3) that should NOT block.
622+
# We only want to block when there's evidence of an interactive challenge.
623+
hits_all = [*iframe_hits, *url_hits, *text_hits, *selector_hits]
624+
hits_l = [str(x).lower() for x in hits_all if x]
625+
626+
strong_text = any(
627+
k in " ".join(hits_l)
628+
for k in (
629+
"i'm not a robot",
630+
"verify you are human",
631+
"human verification",
632+
"complete the security check",
633+
"please verify",
634+
)
635+
)
636+
strong_iframe = any(
637+
any(k in h for k in ("api2/bframe", "hcaptcha", "turnstile"))
638+
for h in hits_l
639+
)
640+
strong_selector = any(
641+
any(
642+
k in h
643+
for k in (
644+
"g-recaptcha-response",
645+
"h-captcha-response",
646+
"cf-turnstile-response",
647+
"recaptcha-checkbox",
648+
"hcaptcha-checkbox",
649+
)
650+
)
651+
for h in hits_l
652+
)
653+
only_generic = (
654+
not strong_text
655+
and not strong_iframe
656+
and not strong_selector
657+
and all(("captcha" in h or "recaptcha" in h) for h in hits_l)
658+
)
659+
if only_generic:
660+
return False
618661
confidence = getattr(captcha, "confidence", 0.0)
619662
return confidence >= self._captcha_options.min_confidence
620663

sentience/backends/snapshot.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -595,7 +595,10 @@ async def _snapshot_via_api(
595595

596596
try:
597597
api_result = await _post_snapshot_to_gateway_async(
598-
payload, options.sentience_api_key, api_url
598+
payload,
599+
options.sentience_api_key,
600+
api_url,
601+
timeout_s=options.gateway_timeout_s,
599602
)
600603

601604
# Merge API result with local data (screenshot, etc.)

sentience/models.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -776,6 +776,7 @@ class SnapshotOptions(BaseModel):
776776
limit: int = Field(50, ge=1, le=500)
777777
filter: SnapshotFilter | None = None
778778
use_api: bool | None = None # Force API vs extension
779+
gateway_timeout_s: float | None = None # Gateway snapshot timeout (seconds)
779780
save_trace: bool = False # Save raw_elements to JSON for benchmarking/training
780781
trace_path: str | None = None # Path to save trace (default: "trace_{timestamp}.json")
781782
goal: str | None = None # Optional goal/task description for the snapshot

sentience/snapshot.py

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,13 @@ def from_httpx(cls, e: Exception) -> "SnapshotGatewayError":
106106
bits.append(f"err_type={type(e).__name__}")
107107
if err_s:
108108
bits.append(f"err={err_s}")
109+
else:
110+
# Some transport errors (e.g. httpx.ReadError) can stringify to "".
111+
# Include repr() so callers can still see the exception type/shape.
112+
try:
113+
bits.append(f"err_repr={cls._snip(repr(e), 220)}")
114+
except Exception:
115+
pass
109116
if bits:
110117
msg = f"{msg}: " + " ".join(bits)
111118
msg = msg + ". Try using use_api=False to use local extension instead."
@@ -162,6 +169,11 @@ def from_requests(cls, e: Exception) -> "SnapshotGatewayError":
162169
bits.append(f"err_type={type(e).__name__}")
163170
if err_s:
164171
bits.append(f"err={err_s}")
172+
else:
173+
try:
174+
bits.append(f"err_repr={cls._snip(repr(e), 220)}")
175+
except Exception:
176+
pass
165177
if bits:
166178
msg = f"{msg}: " + " ".join(bits)
167179
msg = msg + ". Try using use_api=False to use local extension instead."
@@ -311,6 +323,8 @@ def _post_snapshot_to_gateway_sync(
311323
payload: dict[str, Any],
312324
api_key: str,
313325
api_url: str = SENTIENCE_API_URL,
326+
*,
327+
timeout_s: float | None = None,
314328
) -> dict[str, Any]:
315329
"""
316330
Post snapshot payload to gateway (synchronous).
@@ -326,11 +340,12 @@ def _post_snapshot_to_gateway_sync(
326340
}
327341

328342
try:
343+
timeout = 30 if timeout_s is None else float(timeout_s)
329344
response = requests.post(
330345
f"{api_url}/v1/snapshot",
331346
data=payload_json,
332347
headers=headers,
333-
timeout=30,
348+
timeout=timeout,
334349
)
335350
response.raise_for_status()
336351
return response.json()
@@ -345,6 +360,8 @@ async def _post_snapshot_to_gateway_async(
345360
payload: dict[str, Any],
346361
api_key: str,
347362
api_url: str = SENTIENCE_API_URL,
363+
*,
364+
timeout_s: float | None = None,
348365
) -> dict[str, Any]:
349366
"""
350367
Post snapshot payload to gateway (asynchronous).
@@ -362,7 +379,8 @@ async def _post_snapshot_to_gateway_async(
362379
"Content-Type": "application/json",
363380
}
364381

365-
async with httpx.AsyncClient(timeout=30.0) as client:
382+
timeout = 30.0 if timeout_s is None else float(timeout_s)
383+
async with httpx.AsyncClient(timeout=timeout) as client:
366384
try:
367385
response = await client.post(
368386
f"{api_url}/v1/snapshot",
@@ -604,7 +622,12 @@ def _snapshot_via_api(
604622
payload = _build_snapshot_payload(raw_result, options)
605623

606624
try:
607-
api_result = _post_snapshot_to_gateway_sync(payload, api_key, api_url)
625+
api_result = _post_snapshot_to_gateway_sync(
626+
payload,
627+
api_key,
628+
api_url,
629+
timeout_s=options.gateway_timeout_s,
630+
)
608631

609632
# Merge API result with local data (screenshot, etc.)
610633
snapshot_data = _merge_api_result_with_local(api_result, raw_result)
@@ -923,7 +946,8 @@ async def _snapshot_via_api_async(
923946
# Lazy import httpx - only needed for async API calls
924947
import httpx
925948

926-
async with httpx.AsyncClient(timeout=30.0) as client:
949+
timeout = 30.0 if options.gateway_timeout_s is None else float(options.gateway_timeout_s)
950+
async with httpx.AsyncClient(timeout=timeout) as client:
927951
response = await client.post(
928952
f"{api_url}/v1/snapshot",
929953
content=payload_json,
Lines changed: 108 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,108 @@
1+
import asyncio
2+
import importlib
3+
import sys
4+
5+
snapshot_module = importlib.import_module("sentience.snapshot")
6+
from sentience.snapshot import _post_snapshot_to_gateway_async, _post_snapshot_to_gateway_sync
7+
8+
9+
class _DummyResponse:
10+
def raise_for_status(self):
11+
return None
12+
13+
def json(self):
14+
return {"status": "success", "elements": [], "url": "https://example.com"}
15+
16+
17+
def test_post_snapshot_async_uses_default_timeout(monkeypatch):
18+
class DummyClient:
19+
last_timeout = None
20+
21+
def __init__(self, timeout):
22+
DummyClient.last_timeout = timeout
23+
24+
async def __aenter__(self):
25+
return self
26+
27+
async def __aexit__(self, exc_type, exc, tb):
28+
return None
29+
30+
async def post(self, *args, **kwargs):
31+
return _DummyResponse()
32+
33+
dummy_httpx = type("DummyHttpx", (), {"AsyncClient": DummyClient})
34+
monkeypatch.setitem(sys.modules, "httpx", dummy_httpx)
35+
asyncio.run(
36+
_post_snapshot_to_gateway_async(
37+
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
38+
"sk_test",
39+
"https://api.sentienceapi.com",
40+
)
41+
)
42+
assert DummyClient.last_timeout == 30.0
43+
44+
45+
def test_post_snapshot_async_uses_custom_timeout(monkeypatch):
46+
class DummyClient:
47+
last_timeout = None
48+
49+
def __init__(self, timeout):
50+
DummyClient.last_timeout = timeout
51+
52+
async def __aenter__(self):
53+
return self
54+
55+
async def __aexit__(self, exc_type, exc, tb):
56+
return None
57+
58+
async def post(self, *args, **kwargs):
59+
return _DummyResponse()
60+
61+
dummy_httpx = type("DummyHttpx", (), {"AsyncClient": DummyClient})
62+
monkeypatch.setitem(sys.modules, "httpx", dummy_httpx)
63+
asyncio.run(
64+
_post_snapshot_to_gateway_async(
65+
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
66+
"sk_test",
67+
"https://api.sentienceapi.com",
68+
timeout_s=12.5,
69+
)
70+
)
71+
assert DummyClient.last_timeout == 12.5
72+
73+
74+
def test_post_snapshot_sync_uses_default_timeout(monkeypatch):
75+
class DummyRequests:
76+
last_timeout = None
77+
78+
@staticmethod
79+
def post(*args, **kwargs):
80+
DummyRequests.last_timeout = kwargs.get("timeout")
81+
return _DummyResponse()
82+
83+
monkeypatch.setattr(snapshot_module, "requests", DummyRequests)
84+
_post_snapshot_to_gateway_sync(
85+
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
86+
"sk_test",
87+
"https://api.sentienceapi.com",
88+
)
89+
assert DummyRequests.last_timeout == 30
90+
91+
92+
def test_post_snapshot_sync_uses_custom_timeout(monkeypatch):
93+
class DummyRequests:
94+
last_timeout = None
95+
96+
@staticmethod
97+
def post(*args, **kwargs):
98+
DummyRequests.last_timeout = kwargs.get("timeout")
99+
return _DummyResponse()
100+
101+
monkeypatch.setattr(snapshot_module, "requests", DummyRequests)
102+
_post_snapshot_to_gateway_sync(
103+
{"raw_elements": [], "url": "https://example.com", "viewport": None, "goal": None, "options": {}},
104+
"sk_test",
105+
"https://api.sentienceapi.com",
106+
timeout_s=9.0,
107+
)
108+
assert DummyRequests.last_timeout == 9.0

traces/test-run.jsonl

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,5 @@
1+
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459843}
2+
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459845}
3+
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459846}
4+
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459848}
5+
{"v": 1, "type": "run_start", "ts": "2026-02-05T06:20:59.000Z", "run_id": "test-run", "seq": 1, "data": {"agent": "SentienceAgent"}, "ts_ms": 1770272459855}

0 commit comments

Comments
 (0)