Skip to content

Commit 1c1dbe9

Browse files
authored
Merge pull request #51 from SentienceAPI/improve_agent5
Save trace in snapshot
2 parents 2c16ea9 + 3e60628 commit 1c1dbe9

File tree

2 files changed

+74
-24
lines changed

2 files changed

+74
-24
lines changed

sentience/models.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
Pydantic models for Sentience SDK - matches spec/snapshot.schema.json
33
"""
44

5-
from typing import List, Literal, Optional, Union
5+
from typing import Literal
66

77
from pydantic import BaseModel, Field
88

@@ -114,6 +114,8 @@ class SnapshotOptions(BaseModel):
114114
limit: int = Field(50, ge=1, le=500)
115115
filter: SnapshotFilter | None = None
116116
use_api: bool | None = None # Force API vs extension
117+
save_trace: bool = False # Save raw_elements to JSON for benchmarking/training
118+
trace_path: str | None = None # Path to save trace (default: "trace_{timestamp}.json")
117119

118120
class Config:
119121
arbitrary_types_allowed = True

sentience/snapshot.py

Lines changed: 71 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -2,12 +2,38 @@
22
Snapshot functionality - calls window.sentience.snapshot() or server-side API
33
"""
44

5+
import json
6+
import os
7+
import time
58
from typing import Any
69

710
import requests
811

912
from .browser import SentienceBrowser
10-
from .models import Snapshot
13+
from .models import Snapshot, SnapshotOptions
14+
15+
16+
def _save_trace_to_file(raw_elements: list[dict[str, Any]], trace_path: str | None = None) -> None:
17+
"""
18+
Save raw_elements to a JSON file for benchmarking/training
19+
20+
Args:
21+
raw_elements: Raw elements data from snapshot
22+
trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json"
23+
"""
24+
# Default filename if none provided
25+
filename = trace_path or f"trace_{int(time.time())}.json"
26+
27+
# Ensure directory exists
28+
directory = os.path.dirname(filename)
29+
if directory:
30+
os.makedirs(directory, exist_ok=True)
31+
32+
# Save the raw elements to JSON
33+
with open(filename, "w") as f:
34+
json.dump(raw_elements, f, indent=2)
35+
36+
print(f"[SDK] Trace saved to: {filename}")
1137

1238

1339
def snapshot(
@@ -16,6 +42,8 @@ def snapshot(
1642
limit: int | None = None,
1743
filter: dict[str, Any] | None = None,
1844
use_api: bool | None = None,
45+
save_trace: bool = False,
46+
trace_path: str | None = None,
1947
) -> Snapshot:
2048
"""
2149
Take a snapshot of the current page
@@ -27,26 +55,38 @@ def snapshot(
2755
filter: Filter options (min_area, allowed_roles, min_z_index)
2856
use_api: Force use of server-side API if True, local extension if False.
2957
If None, uses API if api_key is set, otherwise uses local extension.
58+
save_trace: Whether to save raw_elements to JSON for benchmarking/training
59+
trace_path: Path to save trace file. If None, uses "trace_{timestamp}.json"
3060
3161
Returns:
3262
Snapshot object
3363
"""
64+
# Build SnapshotOptions from individual parameters
65+
options = SnapshotOptions(
66+
screenshot=screenshot if screenshot is not None else False,
67+
limit=limit if limit is not None else 50,
68+
filter=filter,
69+
use_api=use_api,
70+
save_trace=save_trace,
71+
trace_path=trace_path,
72+
)
73+
3474
# Determine if we should use server-side API
35-
should_use_api = use_api if use_api is not None else (browser.api_key is not None)
75+
should_use_api = (
76+
options.use_api if options.use_api is not None else (browser.api_key is not None)
77+
)
3678

3779
if should_use_api and browser.api_key:
3880
# Use server-side API (Pro/Enterprise tier)
39-
return _snapshot_via_api(browser, screenshot, limit, filter)
81+
return _snapshot_via_api(browser, options)
4082
else:
4183
# Use local extension (Free tier)
42-
return _snapshot_via_extension(browser, screenshot, limit, filter)
84+
return _snapshot_via_extension(browser, options)
4385

4486

4587
def _snapshot_via_extension(
4688
browser: SentienceBrowser,
47-
screenshot: bool | None,
48-
limit: int | None,
49-
filter: dict[str, Any] | None,
89+
options: SnapshotOptions,
5090
) -> Snapshot:
5191
"""Take snapshot using local extension (Free tier)"""
5292
if not browser.page:
@@ -77,14 +117,16 @@ def _snapshot_via_extension(
77117
f"Is the extension loaded? Diagnostics: {diag}"
78118
) from e
79119

80-
# Build options
81-
options: dict[str, Any] = {}
82-
if screenshot is not None:
83-
options["screenshot"] = screenshot
84-
if limit is not None:
85-
options["limit"] = limit
86-
if filter is not None:
87-
options["filter"] = filter
120+
# Build options dict for extension API (exclude save_trace/trace_path)
121+
ext_options: dict[str, Any] = {}
122+
if options.screenshot is not False:
123+
ext_options["screenshot"] = options.screenshot
124+
if options.limit != 50:
125+
ext_options["limit"] = options.limit
126+
if options.filter is not None:
127+
ext_options["filter"] = (
128+
options.filter.model_dump() if hasattr(options.filter, "model_dump") else options.filter
129+
)
88130

89131
# Call extension API
90132
result = browser.page.evaluate(
@@ -93,19 +135,21 @@ def _snapshot_via_extension(
93135
return window.sentience.snapshot(options);
94136
}
95137
""",
96-
options,
138+
ext_options,
97139
)
98140

141+
# Save trace if requested
142+
if options.save_trace:
143+
_save_trace_to_file(result.get("raw_elements", []), options.trace_path)
144+
99145
# Validate and parse with Pydantic
100146
snapshot_obj = Snapshot(**result)
101147
return snapshot_obj
102148

103149

104150
def _snapshot_via_api(
105151
browser: SentienceBrowser,
106-
screenshot: bool | None,
107-
limit: int | None,
108-
filter: dict[str, Any] | None,
152+
options: SnapshotOptions,
109153
) -> Snapshot:
110154
"""Take snapshot using server-side API (Pro/Enterprise tier)"""
111155
if not browser.page:
@@ -128,8 +172,8 @@ def _snapshot_via_api(
128172

129173
# Step 1: Get raw data from local extension (always happens locally)
130174
raw_options: dict[str, Any] = {}
131-
if screenshot is not None:
132-
raw_options["screenshot"] = screenshot
175+
if options.screenshot is not False:
176+
raw_options["screenshot"] = options.screenshot
133177

134178
raw_result = browser.page.evaluate(
135179
"""
@@ -140,6 +184,10 @@ def _snapshot_via_api(
140184
raw_options,
141185
)
142186

187+
# Save trace if requested (save raw data before API processing)
188+
if options.save_trace:
189+
_save_trace_to_file(raw_result.get("raw_elements", []), options.trace_path)
190+
143191
# Step 2: Send to server for smart ranking/filtering
144192
# Use raw_elements (raw data) instead of elements (processed data)
145193
# Server validates API key and applies proprietary ranking logic
@@ -148,8 +196,8 @@ def _snapshot_via_api(
148196
"url": raw_result.get("url", ""),
149197
"viewport": raw_result.get("viewport"),
150198
"options": {
151-
"limit": limit,
152-
"filter": filter,
199+
"limit": options.limit,
200+
"filter": options.filter.model_dump() if options.filter else None,
153201
},
154202
}
155203

0 commit comments

Comments
 (0)