Skip to content

Commit 03c9f0e

Browse files
authored
Merge pull request #222 from Predicate-Labs/scroll_verify
scroll verification
2 parents ce2a873 + 0d8785a commit 03c9f0e

File tree

3 files changed

+216
-0
lines changed

3 files changed

+216
-0
lines changed

README.md

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,24 @@ def login_example() -> None:
155155
- Fluent assertion DSL via `expect(...)`
156156
- Retrying verification via `runtime.check(...).eventually(...)`
157157

158+
### Scroll verification (prevent no-op scroll drift)
159+
160+
A common agent failure mode is “scrolling” without the UI actually advancing (overlays, nested scrollers, focus issues). Use `AgentRuntime.scroll_by(...)` to deterministically verify scroll *had effect* via before/after `scrollTop`.
161+
162+
```python
163+
runtime.begin_step("Scroll the page and verify it moved")
164+
ok = await runtime.scroll_by(
165+
600,
166+
verify=True,
167+
min_delta_px=50,
168+
label="scroll_effective",
169+
required=True,
170+
timeout_s=5.0,
171+
)
172+
if not ok:
173+
raise RuntimeError("Scroll had no effect (likely blocked by overlay or nested scroller).")
174+
```
175+
158176
### Explained failure
159177

160178
- JSONL trace events (`Tracer` + `JsonlTraceSink`)

predicate/agent_runtime.py

Lines changed: 145 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -484,6 +484,151 @@ async def evaluate_js(self, request: EvaluateJsRequest) -> EvaluateJsResult:
484484
truncated=truncated,
485485
)
486486

487+
async def _get_scroll_metrics(self) -> dict[str, Any]:
488+
"""
489+
Best-effort, bounded scroll metrics for verification.
490+
491+
Returns a small JSON-serializable dict with:
492+
- top: current scrollTop (px)
493+
- height: scrollHeight (px) if available
494+
- client: clientHeight (px) if available
495+
"""
496+
# Keep this as a single bounded expression; do not dump DOM.
497+
expr = """
498+
(() => {
499+
try {
500+
const el = document.scrollingElement || document.documentElement || document.body;
501+
const top =
502+
(el && typeof el.scrollTop === 'number')
503+
? el.scrollTop
504+
: (typeof window.scrollY === 'number' ? window.scrollY : 0);
505+
const height = (el && typeof el.scrollHeight === 'number') ? el.scrollHeight : null;
506+
const client = (el && typeof el.clientHeight === 'number') ? el.clientHeight : null;
507+
return { top, height, client };
508+
} catch (e) {
509+
return { top: null, height: null, client: null, error: String(e && e.message ? e.message : e) };
510+
}
511+
})()
512+
""".strip()
513+
v = await self.backend.eval(expr)
514+
if isinstance(v, dict):
515+
return v
516+
return {"top": v, "height": None, "client": None}
517+
518+
async def scroll_by(
519+
self,
520+
dy: float,
521+
*,
522+
verify: bool = True,
523+
min_delta_px: float = 50.0,
524+
label: str = "scroll_effective",
525+
required: bool = True,
526+
timeout_s: float = 10.0,
527+
poll_s: float = 0.25,
528+
x: float | None = None,
529+
y: float | None = None,
530+
js_fallback: bool = True,
531+
) -> bool:
532+
"""
533+
Scroll and (optionally) deterministically verify that the scroll had effect.
534+
535+
This targets a common failure mode: an agent "scrolls" but the page doesn't
536+
actually advance (delta stays ~0 due to overlays, focus, nested scrollers, etc.).
537+
538+
Behavior:
539+
- captures a bounded before/after scrollTop metric
540+
- performs a wheel scroll via backend (most compatible)
541+
- if verify=True, polls until |after-before| >= min_delta_px or timeout
542+
- optionally attempts a JS scrollBy fallback once if wheel has no effect
543+
544+
Returns:
545+
True if scroll was effective (or verify=False), else False.
546+
"""
547+
await self.record_action(f"scroll_by(dy={dy})", url=await self.get_url())
548+
549+
if not verify:
550+
await self.backend.wheel(delta_y=float(dy), x=x, y=y)
551+
return True
552+
553+
before = await self._get_scroll_metrics()
554+
before_top = before.get("top")
555+
try:
556+
before_top_f = float(before_top) if before_top is not None else 0.0
557+
except Exception:
558+
before_top_f = 0.0
559+
560+
used_js_fallback = False
561+
start = time.monotonic()
562+
563+
# First attempt: wheel scroll (preferred).
564+
await self.backend.wheel(delta_y=float(dy), x=x, y=y)
565+
566+
while True:
567+
after = await self._get_scroll_metrics()
568+
after_top = after.get("top")
569+
try:
570+
after_top_f = float(after_top) if after_top is not None else before_top_f
571+
except Exception:
572+
after_top_f = before_top_f
573+
574+
delta = after_top_f - before_top_f
575+
passed = abs(delta) >= float(min_delta_px)
576+
577+
if passed:
578+
outcome = AssertOutcome(
579+
passed=True,
580+
reason="",
581+
details={
582+
"dy": float(dy),
583+
"min_delta_px": float(min_delta_px),
584+
"before": before,
585+
"after": after,
586+
"delta_px": float(delta),
587+
"js_fallback_used": used_js_fallback,
588+
},
589+
)
590+
self._record_outcome(
591+
outcome=outcome,
592+
label=label,
593+
required=required,
594+
kind="scroll",
595+
record_in_step=True,
596+
)
597+
return True
598+
599+
elapsed = time.monotonic() - start
600+
if elapsed >= float(timeout_s):
601+
outcome = AssertOutcome(
602+
passed=False,
603+
reason=f"scroll delta {delta:.1f}px < min_delta_px={float(min_delta_px):.1f}px",
604+
details={
605+
"dy": float(dy),
606+
"min_delta_px": float(min_delta_px),
607+
"before": before,
608+
"after": after,
609+
"delta_px": float(delta),
610+
"js_fallback_used": used_js_fallback,
611+
"timeout_s": float(timeout_s),
612+
},
613+
)
614+
self._record_outcome(
615+
outcome=outcome,
616+
label=label,
617+
required=required,
618+
kind="scroll",
619+
record_in_step=True,
620+
)
621+
if required:
622+
self._persist_failure_artifacts(reason=f"scroll_failed:{label}")
623+
return False
624+
625+
# Optional fallback: if wheel had no effect, try a bounded JS scroll request once.
626+
if js_fallback and not used_js_fallback and abs(delta) < 1.0:
627+
used_js_fallback = True
628+
await self.backend.eval(f"window.scrollBy(0, {float(dy)})")
629+
630+
await asyncio.sleep(float(poll_s))
631+
487632
async def list_tabs(self) -> TabListResult:
488633
backend = self._get_tab_backend()
489634
if backend is None:

tests/test_agent_runtime.py

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -165,6 +165,59 @@ def test_init_with_api_key_and_options(self) -> None:
165165
assert runtime._snapshot_options.sentience_api_key == "sk_pro_key"
166166
assert runtime._snapshot_options.use_api is True
167167

168+
169+
@pytest.mark.asyncio
170+
async def test_scroll_by_verifies_delta_via_scrolltop() -> None:
171+
backend = MagicMock()
172+
backend.get_url = AsyncMock(return_value="https://example.com")
173+
backend.wheel = AsyncMock(return_value=None)
174+
175+
# _get_scroll_metrics() uses backend.eval() with a bounded expression; return before/after.
176+
backend.eval = AsyncMock(
177+
side_effect=[
178+
{"top": 100, "height": 2000, "client": 800}, # before
179+
{"top": 180, "height": 2000, "client": 800}, # after
180+
]
181+
)
182+
tracer = MockTracer()
183+
runtime = AgentRuntime(backend=backend, tracer=tracer)
184+
runtime.begin_step("scroll test")
185+
186+
ok = await runtime.scroll_by(200, verify=True, min_delta_px=50, timeout_s=1.0, poll_s=0.01)
187+
assert ok is True
188+
backend.wheel.assert_awaited()
189+
assert any(
190+
e["type"] == "verification" and e["data"].get("kind") == "scroll" for e in tracer.events
191+
)
192+
193+
194+
@pytest.mark.asyncio
195+
async def test_scroll_by_times_out_and_records_failed_verification() -> None:
196+
backend = MagicMock()
197+
backend.get_url = AsyncMock(return_value="https://example.com")
198+
backend.wheel = AsyncMock(return_value=None)
199+
200+
# before and after unchanged → should fail (allow unlimited polls)
201+
calls = {"n": 0}
202+
203+
async def _eval(_expr: str):
204+
calls["n"] += 1
205+
return {"top": 100, "height": 2000, "client": 800}
206+
207+
backend.eval = AsyncMock(side_effect=_eval)
208+
tracer = MockTracer()
209+
runtime = AgentRuntime(backend=backend, tracer=tracer)
210+
runtime.begin_step("scroll fail")
211+
212+
ok = await runtime.scroll_by(200, verify=True, min_delta_px=50, timeout_s=0.05, poll_s=0.01)
213+
assert ok is False
214+
assert any(
215+
e["type"] == "verification"
216+
and e["data"].get("kind") == "scroll"
217+
and e["data"].get("passed") is False
218+
for e in tracer.events
219+
)
220+
168221
@pytest.mark.asyncio
169222
async def test_evaluate_js_success(self) -> None:
170223
backend = MockBackend()

0 commit comments

Comments
 (0)