Skip to content

Commit 3af1952

Browse files
committed
Enable auth cookie injection
1 parent edea0a1 commit 3af1952

File tree

5 files changed

+286
-3
lines changed

5 files changed

+286
-3
lines changed

screenshot.png

91.1 KB
Loading

sentience/__init__.py

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,11 +32,15 @@
3232
ActionTokenUsage,
3333
AgentActionResult,
3434
BBox,
35+
Cookie,
3536
Element,
37+
LocalStorageItem,
38+
OriginStorage,
3639
ScreenshotConfig,
3740
Snapshot,
3841
SnapshotFilter,
3942
SnapshotOptions,
43+
StorageState,
4044
TokenStats,
4145
Viewport,
4246
WaitResult,
@@ -54,6 +58,7 @@
5458
canonical_snapshot_loose,
5559
canonical_snapshot_strict,
5660
compute_snapshot_digests,
61+
save_storage_state,
5762
sha256_digest,
5863
)
5964
from .wait import wait_for
@@ -105,6 +110,11 @@
105110
"SnapshotOptions",
106111
"SnapshotFilter",
107112
"ScreenshotConfig",
113+
# Storage State Models (Auth Injection)
114+
"StorageState",
115+
"Cookie",
116+
"LocalStorageItem",
117+
"OriginStorage",
108118
# Tracing (v0.12.0+)
109119
"Tracer",
110120
"TraceSink",
@@ -118,6 +128,7 @@
118128
"canonical_snapshot_loose",
119129
"compute_snapshot_digests",
120130
"sha256_digest",
131+
"save_storage_state",
121132
# Formatting (v0.12.0+)
122133
"format_snapshot_for_llm",
123134
# Agent Config (v0.12.0+)

sentience/browser.py

Lines changed: 114 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@
1111

1212
from playwright.sync_api import BrowserContext, Page, Playwright, sync_playwright
1313

14-
from sentience.models import ProxyConfig
14+
from sentience.models import ProxyConfig, StorageState
1515

1616
# Import stealth for bot evasion (optional - graceful fallback if not available)
1717
try:
@@ -31,6 +31,8 @@ def __init__(
3131
api_url: str | None = None,
3232
headless: bool | None = None,
3333
proxy: str | None = None,
34+
user_data_dir: str | None = None,
35+
storage_state: str | Path | StorageState | dict | None = None,
3436
):
3537
"""
3638
Initialize Sentience browser
@@ -46,6 +48,15 @@ def __init__(
4648
proxy: Optional proxy server URL (e.g., 'http://user:pass@proxy.example.com:8080')
4749
Supports HTTP, HTTPS, and SOCKS5 proxies
4850
Falls back to SENTIENCE_PROXY environment variable if not provided
51+
user_data_dir: Optional path to user data directory for persistent sessions.
52+
If None, uses temporary directory (session not persisted).
53+
If provided, cookies and localStorage persist across browser restarts.
54+
storage_state: Optional storage state to inject (cookies + localStorage).
55+
Can be:
56+
- Path to JSON file (str or Path)
57+
- StorageState object
58+
- Dictionary with 'cookies' and/or 'origins' keys
59+
If provided, browser starts with pre-injected authentication.
4960
"""
5061
self.api_key = api_key
5162
# Only set api_url if api_key is provided, otherwise None (free tier)
@@ -65,6 +76,10 @@ def __init__(
6576
# Support proxy from argument or environment variable
6677
self.proxy = proxy or os.environ.get("SENTIENCE_PROXY")
6778

79+
# Auth injection support
80+
self.user_data_dir = user_data_dir
81+
self.storage_state = storage_state
82+
6883
self.playwright: Playwright | None = None
6984
self.context: BrowserContext | None = None
7085
self.page: Page | None = None
@@ -170,9 +185,16 @@ def start(self) -> None:
170185
# Parse proxy configuration if provided
171186
proxy_config = self._parse_proxy(self.proxy) if self.proxy else None
172187

188+
# Handle User Data Directory (Persistence)
189+
if self.user_data_dir:
190+
user_data_dir = str(self.user_data_dir)
191+
Path(user_data_dir).mkdir(parents=True, exist_ok=True)
192+
else:
193+
user_data_dir = "" # Ephemeral temp dir (existing behavior)
194+
173195
# Build launch_persistent_context parameters
174196
launch_params = {
175-
"user_data_dir": "", # Ephemeral temp dir
197+
"user_data_dir": user_data_dir,
176198
"headless": False, # IMPORTANT: See note above
177199
"args": args,
178200
"viewport": {"width": 1280, "height": 800},
@@ -194,6 +216,10 @@ def start(self) -> None:
194216

195217
self.page = self.context.pages[0] if self.context.pages else self.context.new_page()
196218

219+
# Inject storage state if provided (must be after context creation)
220+
if self.storage_state:
221+
self._inject_storage_state(self.storage_state)
222+
197223
# Apply stealth if available
198224
if STEALTH_AVAILABLE:
199225
stealth_sync(self.page)
@@ -233,6 +259,92 @@ def goto(self, url: str) -> None:
233259
f"5. Diagnostic info: {diag}"
234260
)
235261

262+
def _inject_storage_state(
263+
self, storage_state: str | Path | StorageState | dict
264+
) -> None: # noqa: C901
265+
"""
266+
Inject storage state (cookies + localStorage) into browser context.
267+
268+
Args:
269+
storage_state: Path to JSON file, StorageState object, or dict containing storage state
270+
"""
271+
import json
272+
273+
# Load storage state
274+
if isinstance(storage_state, (str, Path)):
275+
# Load from file
276+
with open(storage_state, encoding="utf-8") as f:
277+
state_dict = json.load(f)
278+
state = StorageState.from_dict(state_dict)
279+
elif isinstance(storage_state, StorageState):
280+
# Already a StorageState object
281+
state = storage_state
282+
elif isinstance(storage_state, dict):
283+
# Dictionary format
284+
state = StorageState.from_dict(storage_state)
285+
else:
286+
raise ValueError(
287+
f"Invalid storage_state type: {type(storage_state)}. "
288+
"Expected str, Path, StorageState, or dict."
289+
)
290+
291+
# Inject cookies (works globally)
292+
if state.cookies:
293+
# Convert to Playwright cookie format
294+
playwright_cookies = []
295+
for cookie in state.cookies:
296+
cookie_dict = cookie.model_dump()
297+
# Playwright expects lowercase keys for some fields
298+
playwright_cookie = {
299+
"name": cookie_dict["name"],
300+
"value": cookie_dict["value"],
301+
"domain": cookie_dict["domain"],
302+
"path": cookie_dict["path"],
303+
}
304+
if cookie_dict.get("expires"):
305+
playwright_cookie["expires"] = cookie_dict["expires"]
306+
if cookie_dict.get("httpOnly"):
307+
playwright_cookie["httpOnly"] = cookie_dict["httpOnly"]
308+
if cookie_dict.get("secure"):
309+
playwright_cookie["secure"] = cookie_dict["secure"]
310+
if cookie_dict.get("sameSite"):
311+
playwright_cookie["sameSite"] = cookie_dict["sameSite"]
312+
playwright_cookies.append(playwright_cookie)
313+
314+
self.context.add_cookies(playwright_cookies)
315+
print(f"✅ [Sentience] Injected {len(state.cookies)} cookie(s)")
316+
317+
# Inject LocalStorage (requires navigation to each domain)
318+
if state.origins:
319+
for origin_data in state.origins:
320+
origin = origin_data.origin
321+
if not origin:
322+
continue
323+
324+
# Navigate to origin to set localStorage
325+
try:
326+
self.page.goto(origin, wait_until="domcontentloaded", timeout=10000)
327+
328+
# Inject localStorage
329+
if origin_data.localStorage:
330+
# Convert to dict format for JavaScript
331+
localStorage_dict = {
332+
item.name: item.value for item in origin_data.localStorage
333+
}
334+
self.page.evaluate(
335+
"""(localStorage_data) => {
336+
for (const [key, value] of Object.entries(localStorage_data)) {
337+
localStorage.setItem(key, value);
338+
}
339+
}""",
340+
localStorage_dict,
341+
)
342+
print(
343+
f"✅ [Sentience] Injected {len(origin_data.localStorage)} localStorage item(s) for {origin}"
344+
)
345+
except Exception as e:
346+
print(f"⚠️ [Sentience] Failed to inject localStorage for {origin}: {e}")
347+
236348
def _wait_for_extension(self, timeout_sec: float = 5.0) -> bool:
237349
"""Poll for window.sentience to be available"""
238350
start_time = time.time()

sentience/models.py

Lines changed: 121 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,124 @@ def to_playwright_dict(self) -> dict:
216216
config["username"] = self.username
217217
config["password"] = self.password
218218
return config
219+
220+
221+
# ========== Storage State Models (Auth Injection) ==========
222+
223+
224+
class Cookie(BaseModel):
225+
"""
226+
Cookie definition for storage state injection.
227+
228+
Matches Playwright's cookie format for storage_state.
229+
"""
230+
231+
name: str = Field(..., description="Cookie name")
232+
value: str = Field(..., description="Cookie value")
233+
domain: str = Field(..., description="Cookie domain (e.g., '.example.com')")
234+
path: str = Field(default="/", description="Cookie path")
235+
expires: float | None = Field(None, description="Expiration timestamp (Unix epoch)")
236+
httpOnly: bool = Field(default=False, description="HTTP-only flag")
237+
secure: bool = Field(default=False, description="Secure (HTTPS-only) flag")
238+
sameSite: Literal["Strict", "Lax", "None"] = Field(
239+
default="Lax", description="SameSite attribute"
240+
)
241+
242+
243+
class LocalStorageItem(BaseModel):
244+
"""
245+
LocalStorage item for a specific origin.
246+
247+
Playwright stores localStorage as an array of {name, value} objects.
248+
"""
249+
250+
name: str = Field(..., description="LocalStorage key")
251+
value: str = Field(..., description="LocalStorage value")
252+
253+
254+
class OriginStorage(BaseModel):
255+
"""
256+
Storage state for a specific origin (localStorage).
257+
258+
Represents localStorage data for a single domain.
259+
"""
260+
261+
origin: str = Field(..., description="Origin URL (e.g., 'https://example.com')")
262+
localStorage: list[LocalStorageItem] = Field(
263+
default_factory=list, description="LocalStorage items for this origin"
264+
)
265+
266+
267+
class StorageState(BaseModel):
268+
"""
269+
Complete browser storage state (cookies + localStorage).
270+
271+
This is the format used by Playwright's storage_state() method.
272+
Can be saved to/loaded from JSON files for session injection.
273+
"""
274+
275+
cookies: list[Cookie] = Field(
276+
default_factory=list, description="Cookies to inject (global scope)"
277+
)
278+
origins: list[OriginStorage] = Field(
279+
default_factory=list, description="LocalStorage data per origin"
280+
)
281+
282+
@classmethod
283+
def from_dict(cls, data: dict) -> "StorageState":
284+
"""
285+
Create StorageState from dictionary (e.g., loaded from JSON).
286+
287+
Args:
288+
data: Dictionary with 'cookies' and/or 'origins' keys
289+
290+
Returns:
291+
StorageState instance
292+
"""
293+
cookies = [
294+
Cookie(**cookie) if isinstance(cookie, dict) else cookie
295+
for cookie in data.get("cookies", [])
296+
]
297+
origins = []
298+
for origin_data in data.get("origins", []):
299+
if isinstance(origin_data, dict):
300+
# Handle localStorage as array of {name, value} or as dict
301+
localStorage_data = origin_data.get("localStorage", [])
302+
if isinstance(localStorage_data, dict):
303+
# Convert dict to list of LocalStorageItem
304+
localStorage_items = [
305+
LocalStorageItem(name=k, value=v) for k, v in localStorage_data.items()
306+
]
307+
else:
308+
# Already a list
309+
localStorage_items = [
310+
LocalStorageItem(**item) if isinstance(item, dict) else item
311+
for item in localStorage_data
312+
]
313+
origins.append(
314+
OriginStorage(
315+
origin=origin_data.get("origin", ""),
316+
localStorage=localStorage_items,
317+
)
318+
)
319+
else:
320+
origins.append(origin_data)
321+
return cls(cookies=cookies, origins=origins)
322+
323+
def to_playwright_dict(self) -> dict:
324+
"""
325+
Convert to Playwright-compatible dictionary format.
326+
327+
Returns:
328+
Dictionary compatible with Playwright's storage_state parameter
329+
"""
330+
return {
331+
"cookies": [cookie.model_dump() for cookie in self.cookies],
332+
"origins": [
333+
{
334+
"origin": origin.origin,
335+
"localStorage": [item.model_dump() for item in origin.localStorage],
336+
}
337+
for origin in self.origins
338+
],
339+
}

sentience/utils.py

Lines changed: 40 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,10 @@
1111
import json
1212
import re
1313
from dataclasses import dataclass
14-
from typing import Any, Dict, List, Optional, Union
14+
from pathlib import Path
15+
from typing import Any
16+
17+
from playwright.sync_api import BrowserContext
1518

1619

1720
@dataclass
@@ -255,3 +258,39 @@ def compute_snapshot_digests(elements: list[dict[str, Any]]) -> dict[str, str]:
255258
"strict": sha256_digest(canonical_strict),
256259
"loose": sha256_digest(canonical_loose),
257260
}
261+
262+
263+
def save_storage_state(context: BrowserContext, file_path: str | Path) -> None:
264+
"""
265+
Save current browser storage state (cookies + localStorage) to a file.
266+
267+
This is useful for capturing a logged-in session to reuse later.
268+
269+
Args:
270+
context: Playwright BrowserContext
271+
file_path: Path to save the storage state JSON file
272+
273+
Example:
274+
```python
275+
from sentience import SentienceBrowser, save_storage_state
276+
277+
browser = SentienceBrowser()
278+
browser.start()
279+
280+
# User logs in manually or via agent
281+
browser.goto("https://example.com")
282+
# ... login happens ...
283+
284+
# Save session for later
285+
save_storage_state(browser.context, "auth.json")
286+
```
287+
288+
Raises:
289+
IOError: If file cannot be written
290+
"""
291+
storage_state = context.storage_state()
292+
file_path_obj = Path(file_path)
293+
file_path_obj.parent.mkdir(parents=True, exist_ok=True)
294+
with open(file_path_obj, "w") as f:
295+
json.dump(storage_state, f, indent=2)
296+
print(f"✅ [Sentience] Saved storage state to {file_path_obj}")

0 commit comments

Comments
 (0)