Skip to content

Commit 5b2c68b

Browse files
author
SentienceDEV
committed
tests all passed
1 parent 00f6b08 commit 5b2c68b

File tree

6 files changed

+308
-24
lines changed

6 files changed

+308
-24
lines changed

pyproject.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -13,6 +13,7 @@ dependencies = [
1313
"pydantic>=2.0.0",
1414
"jsonschema>=4.0.0",
1515
"requests>=2.31.0", # For server-side API calls
16+
"playwright-stealth>=1.0.6", # Bot evasion and stealth mode
1617
]
1718

1819
[project.scripts]

pytest.ini

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,5 +3,5 @@ testpaths = tests
33
python_files = test_*.py
44
python_classes = Test*
55
python_functions = test_*
6-
asyncio_mode = auto
6+
asyncio_mode = strict
77

screenshot.png

124 KB
Loading

sentience/browser.py

Lines changed: 139 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,13 @@
99
from typing import Optional
1010
from playwright.sync_api import sync_playwright, BrowserContext, Page, Playwright
1111

12+
# Import stealth for bot evasion (optional - graceful fallback if not available)
13+
try:
14+
from playwright_stealth import stealth_sync
15+
STEALTH_AVAILABLE = True
16+
except ImportError:
17+
STEALTH_AVAILABLE = False
18+
1219

1320
class SentienceBrowser:
1421
"""Main browser session with Sentience extension loaded"""
@@ -25,13 +32,19 @@ def __init__(
2532
Args:
2633
api_key: Optional API key for server-side processing (Pro/Enterprise tiers)
2734
If None, uses free tier (local extension only)
28-
api_url: Optional server URL for API calls (defaults to https://api.sentienceapi.com)
35+
api_url: Server URL for API calls (defaults to https://api.sentienceapi.com if api_key provided)
2936
If None and api_key is provided, uses default URL
37+
If None and no api_key, uses free tier (local extension only)
3038
If 'local' or Docker sidecar URL, uses Enterprise tier
3139
headless: Whether to run in headless mode
3240
"""
3341
self.api_key = api_key
34-
self.api_url = api_url or ("https://api.sentienceapi.com" if api_key else None)
42+
# Only set api_url if api_key is provided, otherwise None (free tier)
43+
# Default to https://api.sentienceapi.com if api_key is provided but api_url is not
44+
if api_key:
45+
self.api_url = api_url or "https://api.sentienceapi.com"
46+
else:
47+
self.api_url = None
3548
self.headless = headless
3649
self.playwright: Optional[Playwright] = None
3750
self.context: Optional[BrowserContext] = None
@@ -85,15 +98,58 @@ def start(self) -> None:
8598
# Launch Playwright
8699
self.playwright = sync_playwright().start()
87100

88-
# Create persistent context with extension
89-
self.context = self.playwright.chromium.launch_persistent_context(
90-
user_data_dir=tempfile.mkdtemp(prefix="sentience-profile-"),
91-
headless=self.headless,
92-
args=[
93-
f"--load-extension={temp_dir}",
94-
f"--disable-extensions-except={temp_dir}",
95-
],
96-
)
101+
# Stealth arguments for bot evasion
102+
stealth_args = [
103+
f"--load-extension={temp_dir}",
104+
f"--disable-extensions-except={temp_dir}",
105+
"--disable-blink-features=AutomationControlled", # Hide automation indicators
106+
"--no-sandbox", # Required for some environments
107+
"--disable-infobars", # Hide "Chrome is being controlled" message
108+
]
109+
110+
# Realistic viewport and user-agent for better evasion
111+
viewport_config = {"width": 1920, "height": 1080}
112+
user_agent = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
113+
114+
# Launch browser with extension
115+
# Note: channel="chrome" (system Chrome) has known issues with extension loading
116+
# We use bundled Chromium for reliable extension loading, but still apply stealth features
117+
user_data_dir = tempfile.mkdtemp(prefix="sentience-profile-")
118+
use_chrome_channel = False # Disable for now due to extension loading issues
119+
120+
try:
121+
if use_chrome_channel:
122+
# Try with system Chrome first (better evasion, but may have extension issues)
123+
self.context = self.playwright.chromium.launch_persistent_context(
124+
user_data_dir=user_data_dir,
125+
channel="chrome", # Use system Chrome (better evasion)
126+
headless=self.headless,
127+
args=stealth_args,
128+
viewport=viewport_config,
129+
user_agent=user_agent,
130+
timeout=30000,
131+
)
132+
else:
133+
# Use bundled Chromium (more reliable for extensions)
134+
self.context = self.playwright.chromium.launch_persistent_context(
135+
user_data_dir=user_data_dir,
136+
headless=self.headless,
137+
args=stealth_args,
138+
viewport=viewport_config,
139+
user_agent=user_agent,
140+
timeout=30000,
141+
)
142+
except Exception as launch_error:
143+
# Clean up on failure
144+
if os.path.exists(user_data_dir):
145+
try:
146+
shutil.rmtree(user_data_dir)
147+
except Exception:
148+
pass
149+
raise RuntimeError(
150+
f"Failed to launch browser: {launch_error}\n"
151+
"Make sure Playwright browsers are installed: playwright install chromium"
152+
) from launch_error
97153

98154
# Get first page or create new one
99155
pages = self.context.pages
@@ -102,31 +158,77 @@ def start(self) -> None:
102158
else:
103159
self.page = self.context.new_page()
104160

161+
# Apply stealth patches for bot evasion (if available)
162+
if STEALTH_AVAILABLE:
163+
try:
164+
stealth_sync(self.page)
165+
except Exception:
166+
# Silently fail if stealth application fails - not critical
167+
# This is expected if playwright-stealth has compatibility issues
168+
pass
169+
170+
# Verify extension is loaded by checking background page
171+
# This helps catch extension loading issues early
172+
try:
173+
background_pages = [p for p in self.context.background_pages]
174+
if not background_pages:
175+
# Extension might not have a background page, or it's not loaded yet
176+
# Wait a bit for extension to initialize
177+
self.page.wait_for_timeout(1000)
178+
except Exception:
179+
# Background pages might not be accessible, continue anyway
180+
pass
181+
105182
# Navigate to a real page so extension can inject
106183
# Extension content scripts only run on actual pages (not about:blank)
107184
# Use a simple page that loads quickly
108-
self.page.goto("https://example.com", wait_until="domcontentloaded")
185+
self.page.goto("https://example.com", wait_until="domcontentloaded", timeout=15000)
109186

110187
# Give extension time to initialize (WASM loading is async)
111-
self.page.wait_for_timeout(1000)
188+
# Content scripts run at document_idle, so we need to wait for that
189+
# Also wait for extension ID to be set by content.js
190+
self.page.wait_for_timeout(3000)
112191

113192
# Wait for extension to load
114-
if not self._wait_for_extension():
193+
if not self._wait_for_extension(timeout=25000):
115194
# Extension might need more time, try waiting a bit longer
116-
self.page.wait_for_timeout(2000)
117-
if not self._wait_for_extension():
195+
self.page.wait_for_timeout(3000)
196+
if not self._wait_for_extension(timeout=15000):
197+
# Get diagnostic info before failing
198+
try:
199+
diagnostic_info = self.page.evaluate("""
200+
() => {
201+
const info = {
202+
sentience_defined: typeof window.sentience !== 'undefined',
203+
registry_defined: typeof window.sentience_registry !== 'undefined',
204+
snapshot_defined: typeof window.sentience?.snapshot === 'function',
205+
extension_id: document.documentElement.dataset.sentienceExtensionId || 'not set',
206+
url: window.location.href
207+
};
208+
if (window.sentience) {
209+
info.sentience_keys = Object.keys(window.sentience);
210+
}
211+
return info;
212+
}
213+
""")
214+
diagnostic_str = f"\n5. Diagnostic info: {diagnostic_info}"
215+
except Exception:
216+
diagnostic_str = "\n5. Could not get diagnostic info"
217+
118218
raise RuntimeError(
119219
"Extension failed to load after navigation. Make sure:\n"
120220
"1. Extension is built (cd sentience-chrome && ./build.sh)\n"
121221
"2. All files are present (manifest.json, content.js, injected_api.js, pkg/)\n"
122-
"3. Check browser console for errors\n"
222+
"3. Check browser console for errors (run with headless=False to see console)\n"
123223
f"4. Extension path: {temp_dir}"
224+
+ diagnostic_str
124225
)
125226

126-
def _wait_for_extension(self, timeout: int = 15000) -> bool:
227+
def _wait_for_extension(self, timeout: int = 20000) -> bool:
127228
"""Wait for window.sentience API to be available"""
128229
import time
129230
start = time.time()
231+
last_error = None
130232

131233
while time.time() - start < timeout / 1000:
132234
try:
@@ -140,21 +242,35 @@ def _wait_for_extension(self, timeout: int = 15000) -> bool:
140242
if (typeof window.sentience.snapshot !== 'function') {
141243
return { ready: false, reason: 'snapshot function not available' };
142244
}
143-
// Check if WASM module is loaded
245+
// Check if registry is initialized
144246
if (window.sentience_registry === undefined) {
145247
return { ready: false, reason: 'registry not initialized' };
146248
}
249+
// Check if WASM module is loaded (check internal _wasmModule if available)
250+
const sentience = window.sentience;
251+
if (sentience._wasmModule && !sentience._wasmModule.analyze_page) {
252+
return { ready: false, reason: 'WASM module not fully loaded' };
253+
}
254+
// If _wasmModule is not exposed, that's okay - it might be internal
255+
// Just verify the API structure is correct
147256
return { ready: true };
148257
}
149258
""")
150259

151-
if isinstance(result, dict) and result.get("ready"):
152-
return True
260+
if isinstance(result, dict):
261+
if result.get("ready"):
262+
return True
263+
last_error = result.get("reason", "Unknown error")
153264
except Exception as e:
154265
# Continue waiting on errors
155-
pass
266+
last_error = f"Evaluation error: {str(e)}"
156267

157-
time.sleep(0.2)
268+
time.sleep(0.3)
269+
270+
# Log the last error for debugging
271+
if last_error:
272+
import warnings
273+
warnings.warn(f"Extension wait timeout. Last status: {last_error}")
158274

159275
return False
160276

tests/test_bot.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,13 @@
1+
from sentience.browser import SentienceBrowser
2+
3+
def test_bot():
4+
browser = SentienceBrowser()
5+
browser.start()
6+
browser.page.goto("https://bot.sannysoft.com")
7+
browser.page.wait_for_timeout(1000)
8+
browser.page.screenshot(path="screenshot.png")
9+
browser.close()
10+
11+
12+
if __name__ == "__main__":
13+
test_bot()

0 commit comments

Comments
 (0)