Skip to content

Commit f0b1907

Browse files
author
SentienceDEV
committed
P1 done
1 parent 85a332d commit f0b1907

File tree

4 files changed

+308
-34
lines changed

4 files changed

+308
-34
lines changed

sentience/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
from .actions import click, click_rect, press, scroll_to, type_text
1515
from .agent import SentienceAgent, SentienceAgentAsync
1616
from .agent_config import AgentConfig
17-
from .agent_runtime import AgentRuntime
17+
from .agent_runtime import AgentRuntime, AssertionHandle
1818

1919
# Backend-agnostic actions (aliased to avoid conflict with existing actions)
2020
# Browser backends (for browser-use integration)

sentience/agent_runtime.py

Lines changed: 210 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,11 @@
6363

6464
from __future__ import annotations
6565

66+
import asyncio
67+
import difflib
68+
import time
6669
import uuid
70+
from dataclasses import dataclass
6771
from typing import TYPE_CHECKING, Any
6872

6973
from .models import Snapshot, SnapshotOptions
@@ -298,29 +302,23 @@ def assert_(
298302
True if assertion passed, False otherwise
299303
"""
300304
outcome = predicate(self._ctx())
305+
self._record_outcome(
306+
outcome=outcome,
307+
label=label,
308+
required=required,
309+
kind="assert",
310+
record_in_step=True,
311+
)
312+
return outcome.passed
301313

302-
record = {
303-
"label": label,
304-
"passed": outcome.passed,
305-
"required": required,
306-
"reason": outcome.reason,
307-
"details": outcome.details,
308-
}
309-
self._assertions_this_step.append(record)
314+
def check(self, predicate: Predicate, label: str, required: bool = False) -> AssertionHandle:
315+
"""
316+
Create an AssertionHandle for fluent `.once()` / `.eventually()` usage.
310317
311-
# Emit dedicated verification event (Option B from design doc)
312-
# This makes assertions visible in Studio timeline
313-
self.tracer.emit(
314-
"verification",
315-
data={
316-
"kind": "assert",
317-
"passed": outcome.passed,
318-
**record,
319-
},
320-
step_id=self.step_id,
321-
)
318+
This does NOT evaluate the predicate immediately.
319+
"""
322320

323-
return outcome.passed
321+
return AssertionHandle(runtime=self, predicate=predicate, label=label, required=required)
324322

325323
def assert_done(
326324
self,
@@ -342,6 +340,7 @@ def assert_done(
342340
Returns:
343341
True if task is complete (assertion passed), False otherwise
344342
"""
343+
# Convenience wrapper for assert_ with required=True
345344
ok = self.assertTrue(predicate, label=label, required=True)
346345
if ok:
347346
self._task_done = True
@@ -360,6 +359,197 @@ def assert_done(
360359

361360
return ok
362361

362+
def _record_outcome(
363+
self,
364+
*,
365+
outcome: Any,
366+
label: str,
367+
required: bool,
368+
kind: str,
369+
record_in_step: bool,
370+
extra: dict[str, Any] | None = None,
371+
) -> None:
372+
"""
373+
Internal helper: emit verification event and optionally accumulate for step_end.
374+
"""
375+
details = dict(outcome.details or {})
376+
377+
# Failure intelligence: nearest matches for selector-driven assertions
378+
if not outcome.passed and self.last_snapshot is not None and "selector" in details:
379+
selector = str(details.get("selector") or "")
380+
details.setdefault("nearest_matches", self._nearest_matches(selector, limit=3))
381+
382+
record = {
383+
"label": label,
384+
"passed": bool(outcome.passed),
385+
"required": required,
386+
"reason": str(outcome.reason or ""),
387+
"details": details,
388+
}
389+
if extra:
390+
record.update(extra)
391+
392+
if record_in_step:
393+
self._assertions_this_step.append(record)
394+
395+
self.tracer.emit(
396+
"verification",
397+
data={
398+
"kind": kind,
399+
"passed": bool(outcome.passed),
400+
**record,
401+
},
402+
step_id=self.step_id,
403+
)
404+
405+
def _nearest_matches(self, selector: str, *, limit: int = 3) -> list[dict[str, Any]]:
406+
"""
407+
Best-effort nearest match suggestions for debugging failed selector assertions.
408+
"""
409+
if self.last_snapshot is None:
410+
return []
411+
412+
s = selector.lower().strip()
413+
if not s:
414+
return []
415+
416+
scored: list[tuple[float, Any]] = []
417+
for el in self.last_snapshot.elements:
418+
hay = (getattr(el, "name", None) or getattr(el, "text", None) or "").strip()
419+
if not hay:
420+
continue
421+
score = difflib.SequenceMatcher(None, s, hay.lower()).ratio()
422+
scored.append((score, el))
423+
424+
scored.sort(key=lambda t: t[0], reverse=True)
425+
out: list[dict[str, Any]] = []
426+
for score, el in scored[:limit]:
427+
out.append(
428+
{
429+
"id": getattr(el, "id", None),
430+
"role": getattr(el, "role", None),
431+
"text": (getattr(el, "text", "") or "")[:80],
432+
"name": (getattr(el, "name", "") or "")[:80],
433+
"score": round(float(score), 4),
434+
}
435+
)
436+
return out
437+
438+
def get_assertions_for_step_end(self) -> dict[str, Any]:
439+
"""
440+
Get assertions data for inclusion in step_end.data.verify.signals.
441+
442+
Returns:
443+
Dictionary with 'assertions', 'task_done', 'task_done_label' keys
444+
"""
445+
result: dict[str, Any] = {
446+
"assertions": self._assertions_this_step.copy(),
447+
}
448+
449+
if self._task_done:
450+
result["task_done"] = True
451+
result["task_done_label"] = self._task_done_label
452+
453+
return result
454+
455+
def flush_assertions(self) -> list[dict[str, Any]]:
456+
"""
457+
Get and clear assertions for current step.
458+
"""
459+
assertions = self._assertions_this_step.copy()
460+
self._assertions_this_step = []
461+
return assertions
462+
463+
@property
464+
def is_task_done(self) -> bool:
465+
"""Check if task has been marked as done via assert_done()."""
466+
return self._task_done
467+
468+
def reset_task_done(self) -> None:
469+
"""Reset task_done state (for multi-task runs)."""
470+
self._task_done = False
471+
self._task_done_label = None
472+
473+
def all_assertions_passed(self) -> bool:
474+
"""Return True if all assertions in current step passed (or none)."""
475+
return all(a["passed"] for a in self._assertions_this_step)
476+
477+
def required_assertions_passed(self) -> bool:
478+
"""Return True if all required assertions in current step passed (or none)."""
479+
required = [a for a in self._assertions_this_step if a.get("required")]
480+
return all(a["passed"] for a in required)
481+
482+
483+
@dataclass
484+
class AssertionHandle:
485+
runtime: AgentRuntime
486+
predicate: Predicate
487+
label: str
488+
required: bool = False
489+
490+
def once(self) -> bool:
491+
"""Evaluate once (same behavior as runtime.assert_)."""
492+
return self.runtime.assert_(self.predicate, label=self.label, required=self.required)
493+
494+
async def eventually(
495+
self,
496+
*,
497+
timeout_s: float = 10.0,
498+
poll_s: float = 0.25,
499+
snapshot_kwargs: dict[str, Any] | None = None,
500+
) -> bool:
501+
"""
502+
Retry until the predicate passes or timeout is reached.
503+
504+
Intermediate attempts emit verification events but do NOT accumulate in step_end assertions.
505+
Final result is accumulated once.
506+
"""
507+
deadline = time.monotonic() + timeout_s
508+
attempt = 0
509+
last_outcome = None
510+
511+
while True:
512+
attempt += 1
513+
await self.runtime.snapshot(**(snapshot_kwargs or {}))
514+
515+
last_outcome = self.predicate(self.runtime._ctx())
516+
517+
# Emit attempt event (not recorded in step_end)
518+
self.runtime._record_outcome(
519+
outcome=last_outcome,
520+
label=self.label,
521+
required=self.required,
522+
kind="assert",
523+
record_in_step=False,
524+
extra={"eventually": True, "attempt": attempt},
525+
)
526+
527+
if last_outcome.passed:
528+
# Record final success once
529+
self.runtime._record_outcome(
530+
outcome=last_outcome,
531+
label=self.label,
532+
required=self.required,
533+
kind="assert",
534+
record_in_step=True,
535+
extra={"eventually": True, "attempt": attempt, "final": True},
536+
)
537+
return True
538+
539+
if time.monotonic() >= deadline:
540+
# Record final failure once
541+
self.runtime._record_outcome(
542+
outcome=last_outcome,
543+
label=self.label,
544+
required=self.required,
545+
kind="assert",
546+
record_in_step=True,
547+
extra={"eventually": True, "attempt": attempt, "final": True, "timeout": True},
548+
)
549+
return False
550+
551+
await asyncio.sleep(poll_s)
552+
363553
def get_assertions_for_step_end(self) -> dict[str, Any]:
364554
"""
365555
Get assertions data for inclusion in step_end.data.verify.signals.

0 commit comments

Comments
 (0)