Skip to content

Commit c70018f

Browse files
authored
Merge pull request #220 from Predicate-Labs/expanded_verification
expanded verifications
2 parents 77a7b44 + 59125ce commit c70018f

File tree

2 files changed

+171
-3
lines changed

2 files changed

+171
-3
lines changed

predicate/agent_runtime.py

Lines changed: 104 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1310,6 +1310,7 @@ async def eventually(
13101310
min_confidence: float | None = None,
13111311
max_snapshot_attempts: int = 3,
13121312
snapshot_kwargs: dict[str, Any] | None = None,
1313+
snapshot_limit_growth: dict[str, Any] | None = None,
13131314
vision_provider: Any | None = None,
13141315
vision_system_prompt: str | None = None,
13151316
vision_user_prompt: str | None = None,
@@ -1325,9 +1326,103 @@ async def eventually(
13251326
snapshot_attempt = 0
13261327
last_outcome = None
13271328

1329+
# Optional: increase SnapshotOptions.limit across retries to widen element coverage.
1330+
#
1331+
# This is useful on long / virtualized pages where an initial small limit may miss
1332+
# a target element, but taking a "bigger" snapshot is enough to make a deterministic
1333+
# predicate pass.
1334+
#
1335+
# Additive schedule (requested):
1336+
# limit(attempt) = min(max_limit, start_limit + step*(attempt-1))
1337+
#
1338+
# Notes:
1339+
# - We clamp to SnapshotOptions Field constraints (1..500).
1340+
# - If both snapshot_kwargs["limit"] and snapshot_limit_growth are provided,
1341+
# snapshot_limit_growth controls the per-attempt limit (callers can set
1342+
# start_limit explicitly if desired).
1343+
growth = snapshot_limit_growth or None
1344+
growth_apply_on = "only_on_fail"
1345+
growth_start: int | None = None
1346+
growth_step: int | None = None
1347+
growth_max: int | None = None
1348+
if isinstance(growth, dict) and growth:
1349+
try:
1350+
growth_apply_on = str(growth.get("apply_on") or "only_on_fail")
1351+
except Exception:
1352+
growth_apply_on = "only_on_fail"
1353+
try:
1354+
v = growth.get("start_limit", None)
1355+
growth_start = int(v) if v is not None else None
1356+
except Exception:
1357+
growth_start = None
1358+
try:
1359+
v = growth.get("step", None)
1360+
growth_step = int(v) if v is not None else None
1361+
except Exception:
1362+
growth_step = None
1363+
try:
1364+
v = growth.get("max_limit", None)
1365+
growth_max = int(v) if v is not None else None
1366+
except Exception:
1367+
growth_max = None
1368+
1369+
# Resolve defaults from runtime + snapshot_kwargs.
1370+
if growth and growth_start is None:
1371+
try:
1372+
if snapshot_kwargs and snapshot_kwargs.get("limit") is not None:
1373+
growth_start = int(snapshot_kwargs["limit"])
1374+
except Exception:
1375+
growth_start = None
1376+
if growth and growth_start is None:
1377+
try:
1378+
growth_start = int(getattr(self.runtime, "_snapshot_options", None).limit) # type: ignore[attr-defined]
1379+
except Exception:
1380+
growth_start = None
1381+
if growth and growth_start is None:
1382+
growth_start = 50 # SnapshotOptions default
1383+
1384+
if growth and growth_step is None:
1385+
growth_step = max(1, int(growth_start))
1386+
if growth and growth_max is None:
1387+
growth_max = 500
1388+
1389+
def _clamp_limit(n: int) -> int:
1390+
if n < 1:
1391+
return 1
1392+
if n > 500:
1393+
return 500
1394+
return n
1395+
1396+
def _limit_for_attempt(attempt_idx_1based: int) -> int:
1397+
assert growth_start is not None and growth_step is not None and growth_max is not None
1398+
base = int(growth_start) + int(growth_step) * max(0, int(attempt_idx_1based) - 1)
1399+
return _clamp_limit(min(int(growth_max), base))
1400+
13281401
while True:
13291402
attempt += 1
1330-
await self.runtime.snapshot(**(snapshot_kwargs or {}))
1403+
1404+
per_attempt_kwargs = dict(snapshot_kwargs or {})
1405+
snapshot_limit: int | None = None
1406+
if growth:
1407+
# Only grow if requested; otherwise fixed start_limit.
1408+
apply = growth_apply_on == "all"
1409+
if growth_apply_on == "only_on_fail":
1410+
# attempt==1 always uses the start_limit; attempt>1 grows (since we'd have
1411+
# returned already if the previous attempt passed).
1412+
apply = attempt == 1 or (last_outcome is not None and not bool(last_outcome.passed))
1413+
if apply:
1414+
snapshot_limit = _limit_for_attempt(attempt)
1415+
else:
1416+
snapshot_limit = _clamp_limit(int(growth_start or 50))
1417+
per_attempt_kwargs["limit"] = snapshot_limit
1418+
else:
1419+
try:
1420+
if per_attempt_kwargs.get("limit") is not None:
1421+
snapshot_limit = int(per_attempt_kwargs["limit"])
1422+
except Exception:
1423+
snapshot_limit = None
1424+
1425+
await self.runtime.snapshot(**per_attempt_kwargs)
13311426
snapshot_attempt += 1
13321427

13331428
# Optional: gate predicate evaluation on snapshot confidence.
@@ -1372,6 +1467,7 @@ async def eventually(
13721467
"eventually": True,
13731468
"attempt": attempt,
13741469
"snapshot_attempt": snapshot_attempt,
1470+
"snapshot_limit": snapshot_limit,
13751471
},
13761472
)
13771473

@@ -1481,6 +1577,7 @@ async def eventually(
14811577
"eventually": True,
14821578
"attempt": attempt,
14831579
"snapshot_attempt": snapshot_attempt,
1580+
"snapshot_limit": snapshot_limit,
14841581
"final": True,
14851582
"timeout": True,
14861583
},
@@ -1503,7 +1600,12 @@ async def eventually(
15031600
required=self.required,
15041601
kind="assert",
15051602
record_in_step=False,
1506-
extra={"eventually": True, "attempt": attempt},
1603+
extra={
1604+
"eventually": True,
1605+
"attempt": attempt,
1606+
"snapshot_attempt": snapshot_attempt,
1607+
"snapshot_limit": snapshot_limit,
1608+
},
15071609
)
15081610

15091611
if last_outcome.passed:

predicate/asserts/expect.py

Lines changed: 67 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,9 @@ class EventuallyConfig:
4444
timeout: float = DEFAULT_TIMEOUT # Max time to wait (seconds)
4545
poll: float = DEFAULT_POLL # Interval between retries (seconds)
4646
max_retries: int = DEFAULT_MAX_RETRIES # Max number of retry attempts
47+
# Optional: increase SnapshotOptions.limit across retries (additive schedule).
48+
# See docs/expand_deterministic_verifications_sdk.md for details.
49+
snapshot_limit_growth: dict[str, Any] | None = None
4750

4851

4952
class ExpectBuilder:
@@ -514,6 +517,51 @@ async def evaluate(self, ctx: AssertContext, snapshot_fn) -> AssertOutcome:
514517
last_outcome: AssertOutcome | None = None
515518
attempts = 0
516519

520+
growth = self._config.snapshot_limit_growth
521+
growth_apply_on = "only_on_fail"
522+
growth_start: int | None = None
523+
growth_step: int | None = None
524+
growth_max: int | None = None
525+
if isinstance(growth, dict) and growth:
526+
try:
527+
growth_apply_on = str(growth.get("apply_on") or "only_on_fail")
528+
except Exception:
529+
growth_apply_on = "only_on_fail"
530+
try:
531+
v = growth.get("start_limit", None)
532+
growth_start = int(v) if v is not None else None
533+
except Exception:
534+
growth_start = None
535+
try:
536+
v = growth.get("step", None)
537+
growth_step = int(v) if v is not None else None
538+
except Exception:
539+
growth_step = None
540+
try:
541+
v = growth.get("max_limit", None)
542+
growth_max = int(v) if v is not None else None
543+
except Exception:
544+
growth_max = None
545+
546+
if growth and growth_start is None:
547+
growth_start = 50
548+
if growth and growth_step is None:
549+
growth_step = max(1, int(growth_start or 50))
550+
if growth and growth_max is None:
551+
growth_max = 500
552+
553+
def _clamp_limit(n: int) -> int:
554+
if n < 1:
555+
return 1
556+
if n > 500:
557+
return 500
558+
return n
559+
560+
def _limit_for_attempt(attempt_idx_1based: int) -> int:
561+
assert growth_start is not None and growth_step is not None and growth_max is not None
562+
base = int(growth_start) + int(growth_step) * max(0, int(attempt_idx_1based) - 1)
563+
return _clamp_limit(min(int(growth_max), base))
564+
517565
while True:
518566
# Check timeout (higher precedence than max_retries)
519567
elapsed = time.monotonic() - start_time
@@ -543,7 +591,23 @@ async def evaluate(self, ctx: AssertContext, snapshot_fn) -> AssertOutcome:
543591
# Take fresh snapshot if not first attempt
544592
if attempts > 0:
545593
try:
546-
fresh_snapshot = await snapshot_fn()
594+
# If snapshot_fn supports kwargs (e.g. runtime.snapshot), pass adaptive limit.
595+
snap_limit = None
596+
if growth:
597+
# attempts is 1-based for the snapshot attempt here (attempts>0 means >=2nd try)
598+
attempt_idx = attempts + 1
599+
apply = growth_apply_on == "all" or (
600+
growth_apply_on == "only_on_fail" and last_outcome is not None
601+
)
602+
if apply:
603+
snap_limit = _limit_for_attempt(attempt_idx)
604+
if snap_limit is not None:
605+
try:
606+
fresh_snapshot = await snapshot_fn(limit=int(snap_limit))
607+
except TypeError:
608+
fresh_snapshot = await snapshot_fn()
609+
else:
610+
fresh_snapshot = await snapshot_fn()
547611
ctx = AssertContext(
548612
snapshot=fresh_snapshot,
549613
url=fresh_snapshot.url if fresh_snapshot else ctx.url,
@@ -588,6 +652,7 @@ def with_eventually(
588652
timeout: float = DEFAULT_TIMEOUT,
589653
poll: float = DEFAULT_POLL,
590654
max_retries: int = DEFAULT_MAX_RETRIES,
655+
snapshot_limit_growth: dict[str, Any] | None = None,
591656
) -> EventuallyWrapper:
592657
"""
593658
Wrap a predicate with retry logic.
@@ -617,5 +682,6 @@ def with_eventually(
617682
timeout=timeout,
618683
poll=poll,
619684
max_retries=max_retries,
685+
snapshot_limit_growth=snapshot_limit_growth,
620686
)
621687
return EventuallyWrapper(predicate, config)

0 commit comments

Comments
 (0)