Skip to content

Commit a421c68

Browse files
author
SentienceDEV
committed
P2 with mutation observer
1 parent d1c2bf5 commit a421c68

File tree

4 files changed

+276
-68
lines changed

4 files changed

+276
-68
lines changed

sentience/agent_runtime.py

Lines changed: 161 additions & 65 deletions
Original file line numberDiff line numberDiff line change
@@ -71,7 +71,7 @@
7171
from typing import TYPE_CHECKING, Any
7272

7373
from .models import Snapshot, SnapshotOptions
74-
from .verification import AssertContext, Predicate
74+
from .verification import AssertContext, AssertOutcome, Predicate
7575

7676
if TYPE_CHECKING:
7777
from playwright.async_api import Page
@@ -341,7 +341,7 @@ def assert_done(
341341
True if task is complete (assertion passed), False otherwise
342342
"""
343343
# Convenience wrapper for assert_ with required=True
344-
ok = self.assert_(predicate, label=label, required=True)
344+
ok = self.assertTrue(predicate, label=label, required=True)
345345
if ok:
346346
self._task_done = True
347347
self._task_done_label = label
@@ -496,7 +496,12 @@ async def eventually(
496496
*,
497497
timeout_s: float = 10.0,
498498
poll_s: float = 0.25,
499+
min_confidence: float | None = None,
500+
max_snapshot_attempts: int = 3,
499501
snapshot_kwargs: dict[str, Any] | None = None,
502+
vision_provider: Any | None = None,
503+
vision_system_prompt: str | None = None,
504+
vision_user_prompt: str | None = None,
500505
) -> bool:
501506
"""
502507
Retry until the predicate passes or timeout is reached.
@@ -506,11 +511,165 @@ async def eventually(
506511
"""
507512
deadline = time.monotonic() + timeout_s
508513
attempt = 0
514+
snapshot_attempt = 0
509515
last_outcome = None
510516

511517
while True:
512518
attempt += 1
513519
await self.runtime.snapshot(**(snapshot_kwargs or {}))
520+
snapshot_attempt += 1
521+
522+
# Optional: gate predicate evaluation on snapshot confidence.
523+
# If diagnostics are missing, we don't block (backward compatible).
524+
confidence = None
525+
diagnostics = None
526+
if self.runtime.last_snapshot is not None:
527+
diagnostics = getattr(self.runtime.last_snapshot, "diagnostics", None)
528+
if diagnostics is not None:
529+
confidence = getattr(diagnostics, "confidence", None)
530+
531+
if (
532+
min_confidence is not None
533+
and confidence is not None
534+
and isinstance(confidence, (int, float))
535+
and confidence < min_confidence
536+
):
537+
last_outcome = AssertOutcome(
538+
passed=False,
539+
reason=f"Snapshot confidence {confidence:.3f} < min_confidence {min_confidence:.3f}",
540+
details={
541+
"reason_code": "snapshot_low_confidence",
542+
"confidence": confidence,
543+
"min_confidence": min_confidence,
544+
"snapshot_attempt": snapshot_attempt,
545+
"diagnostics": (
546+
diagnostics.model_dump()
547+
if hasattr(diagnostics, "model_dump")
548+
else diagnostics
549+
),
550+
},
551+
)
552+
553+
# Emit attempt event (not recorded in step_end)
554+
self.runtime._record_outcome(
555+
outcome=last_outcome,
556+
label=self.label,
557+
required=self.required,
558+
kind="assert",
559+
record_in_step=False,
560+
extra={
561+
"eventually": True,
562+
"attempt": attempt,
563+
"snapshot_attempt": snapshot_attempt,
564+
},
565+
)
566+
567+
if snapshot_attempt >= max_snapshot_attempts:
568+
# Optional: vision fallback as last resort (Phase 2-lite).
569+
# This keeps the assertion surface invariant; only the perception layer changes.
570+
if (
571+
vision_provider is not None
572+
and getattr(vision_provider, "supports_vision", lambda: False)()
573+
):
574+
try:
575+
import base64
576+
577+
png_bytes = await self.runtime.backend.screenshot_png()
578+
image_b64 = base64.b64encode(png_bytes).decode("utf-8")
579+
580+
sys_prompt = vision_system_prompt or (
581+
"You are a strict visual verifier. Answer only YES or NO."
582+
)
583+
user_prompt = vision_user_prompt or (
584+
f"Given the screenshot, is the following condition satisfied?\n\n{self.label}\n\nAnswer YES or NO."
585+
)
586+
587+
resp = vision_provider.generate_with_image(
588+
sys_prompt,
589+
user_prompt,
590+
image_base64=image_b64,
591+
temperature=0.0,
592+
)
593+
text = (resp.content or "").strip().lower()
594+
passed = text.startswith("yes")
595+
596+
final_outcome = AssertOutcome(
597+
passed=passed,
598+
reason="vision_fallback_yes" if passed else "vision_fallback_no",
599+
details={
600+
"reason_code": (
601+
"vision_fallback_pass" if passed else "vision_fallback_fail"
602+
),
603+
"vision_response": resp.content,
604+
"min_confidence": min_confidence,
605+
"snapshot_attempts": snapshot_attempt,
606+
},
607+
)
608+
self.runtime._record_outcome(
609+
outcome=final_outcome,
610+
label=self.label,
611+
required=self.required,
612+
kind="assert",
613+
record_in_step=True,
614+
extra={
615+
"eventually": True,
616+
"attempt": attempt,
617+
"snapshot_attempt": snapshot_attempt,
618+
"final": True,
619+
"vision_fallback": True,
620+
},
621+
)
622+
return passed
623+
except Exception as e:
624+
# If vision fallback fails, fall through to snapshot_exhausted.
625+
last_outcome.details["vision_error"] = str(e)
626+
627+
final_outcome = AssertOutcome(
628+
passed=False,
629+
reason=f"Snapshot exhausted after {snapshot_attempt} attempt(s) below min_confidence {min_confidence:.3f}",
630+
details={
631+
"reason_code": "snapshot_exhausted",
632+
"confidence": confidence,
633+
"min_confidence": min_confidence,
634+
"snapshot_attempts": snapshot_attempt,
635+
"diagnostics": last_outcome.details.get("diagnostics"),
636+
},
637+
)
638+
self.runtime._record_outcome(
639+
outcome=final_outcome,
640+
label=self.label,
641+
required=self.required,
642+
kind="assert",
643+
record_in_step=True,
644+
extra={
645+
"eventually": True,
646+
"attempt": attempt,
647+
"snapshot_attempt": snapshot_attempt,
648+
"final": True,
649+
"exhausted": True,
650+
},
651+
)
652+
return False
653+
654+
if time.monotonic() >= deadline:
655+
self.runtime._record_outcome(
656+
outcome=last_outcome,
657+
label=self.label,
658+
required=self.required,
659+
kind="assert",
660+
record_in_step=True,
661+
extra={
662+
"eventually": True,
663+
"attempt": attempt,
664+
"snapshot_attempt": snapshot_attempt,
665+
"final": True,
666+
"timeout": True,
667+
},
668+
)
669+
return False
670+
671+
await asyncio.sleep(poll_s)
672+
continue
514673

515674
last_outcome = self.predicate(self.runtime._ctx())
516675

@@ -549,66 +708,3 @@ async def eventually(
549708
return False
550709

551710
await asyncio.sleep(poll_s)
552-
553-
def get_assertions_for_step_end(self) -> dict[str, Any]:
554-
"""
555-
Get assertions data for inclusion in step_end.data.verify.signals.
556-
557-
This is called when building the step_end event to include
558-
assertion results in the trace.
559-
560-
Returns:
561-
Dictionary with 'assertions', 'task_done', 'task_done_label' keys
562-
"""
563-
result: dict[str, Any] = {
564-
"assertions": self._assertions_this_step.copy(),
565-
}
566-
567-
if self._task_done:
568-
result["task_done"] = True
569-
result["task_done_label"] = self._task_done_label
570-
571-
return result
572-
573-
def flush_assertions(self) -> list[dict[str, Any]]:
574-
"""
575-
Get and clear assertions for current step.
576-
577-
Call this at step end to get accumulated assertions
578-
for the step_end event, then clear for next step.
579-
580-
Returns:
581-
List of assertion records from this step
582-
"""
583-
assertions = self._assertions_this_step.copy()
584-
self._assertions_this_step = []
585-
return assertions
586-
587-
@property
588-
def is_task_done(self) -> bool:
589-
"""Check if task has been marked as done via assert_done()."""
590-
return self._task_done
591-
592-
def reset_task_done(self) -> None:
593-
"""Reset task_done state (for multi-task runs)."""
594-
self._task_done = False
595-
self._task_done_label = None
596-
597-
def all_assertions_passed(self) -> bool:
598-
"""
599-
Check if all assertions in current step passed.
600-
601-
Returns:
602-
True if all assertions passed (or no assertions made)
603-
"""
604-
return all(a["passed"] for a in self._assertions_this_step)
605-
606-
def required_assertions_passed(self) -> bool:
607-
"""
608-
Check if all required assertions in current step passed.
609-
610-
Returns:
611-
True if all required assertions passed (or no required assertions)
612-
"""
613-
required = [a for a in self._assertions_this_step if a.get("required")]
614-
return all(a["passed"] for a in required)

sentience/models.py

Lines changed: 23 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,8 @@
22
Pydantic models for Sentience SDK - matches spec/snapshot.schema.json
33
"""
44

5+
from __future__ import annotations
6+
57
from dataclasses import dataclass
68
from typing import Any, Literal
79

@@ -90,7 +92,7 @@ class Element(BaseModel):
9092
# Layout-derived metadata (internal-only in v0, not exposed in API responses)
9193
# Per ChatGPT feedback: explicitly optional to prevent users assuming layout is always present
9294
# Note: This field is marked with skip_serializing_if in Rust, so it won't appear in API responses
93-
layout: "LayoutHints | None" = None
95+
layout: LayoutHints | None = None
9496

9597

9698
class GridPosition(BaseModel):
@@ -153,6 +155,8 @@ class Snapshot(BaseModel):
153155
requires_license: bool | None = None
154156
# Phase 2: Dominant group key for ordinal selection
155157
dominant_group_key: str | None = None # The most common group_key (main content group)
158+
# Phase 2: Runtime stability/debug info (confidence/reasons/metrics)
159+
diagnostics: SnapshotDiagnostics | None = None
156160

157161
def save(self, filepath: str) -> None:
158162
"""Save snapshot as JSON file"""
@@ -161,6 +165,22 @@ def save(self, filepath: str) -> None:
161165
with open(filepath, "w", encoding="utf-8") as f:
162166
json.dump(self.model_dump(), f, indent=2)
163167

168+
169+
class SnapshotDiagnosticsMetrics(BaseModel):
170+
ready_state: str | None = None
171+
quiet_ms: float | None = None
172+
node_count: int | None = None
173+
interactive_count: int | None = None
174+
raw_elements_count: int | None = None
175+
176+
177+
class SnapshotDiagnostics(BaseModel):
178+
"""Runtime stability/debug information (reserved for diagnostics, not ML metadata)."""
179+
180+
confidence: float | None = None
181+
reasons: list[str] = []
182+
metrics: SnapshotDiagnosticsMetrics | None = None
183+
164184
def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
165185
"""
166186
Get grid coordinates (bounding boxes) for detected grids.
@@ -290,7 +310,7 @@ def get_grid_bounds(self, grid_id: int | None = None) -> list[GridInfo]:
290310
return grid_infos
291311

292312
@staticmethod
293-
def _infer_grid_label(elements: list["Element"]) -> str | None:
313+
def _infer_grid_label(elements: list[Element]) -> str | None:
294314
"""
295315
Infer grid label from element patterns using text fingerprinting (best-effort heuristic).
296316
@@ -685,7 +705,7 @@ class StorageState(BaseModel):
685705
)
686706

687707
@classmethod
688-
def from_dict(cls, data: dict) -> "StorageState":
708+
def from_dict(cls, data: dict) -> StorageState:
689709
"""
690710
Create StorageState from dictionary (e.g., loaded from JSON).
691711

sentience/snapshot.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,13 @@ def _build_snapshot_payload(
2929
3030
Shared helper used by both sync and async snapshot implementations.
3131
"""
32+
diagnostics = raw_result.get("diagnostics") or {}
33+
client_metrics = None
34+
try:
35+
client_metrics = diagnostics.get("metrics")
36+
except Exception:
37+
client_metrics = None
38+
3239
return {
3340
"raw_elements": raw_result.get("raw_elements", []),
3441
"url": raw_result.get("url", ""),
@@ -38,6 +45,7 @@ def _build_snapshot_payload(
3845
"limit": options.limit,
3946
"filter": options.filter.model_dump() if options.filter else None,
4047
},
48+
"client_metrics": client_metrics,
4149
}
4250

4351

@@ -133,6 +141,8 @@ def _merge_api_result_with_local(
133141
"screenshot": raw_result.get("screenshot"), # Keep local screenshot
134142
"screenshot_format": raw_result.get("screenshot_format"),
135143
"error": api_result.get("error"),
144+
# Phase 2: Runtime stability/debug info
145+
"diagnostics": api_result.get("diagnostics", raw_result.get("diagnostics")),
136146
# Phase 2: Ordinal support - dominant group key from Gateway
137147
"dominant_group_key": api_result.get("dominant_group_key"),
138148
}

0 commit comments

Comments
 (0)