SentienceAPI
diff --git a/‎README.md‎
Lines changed: 68 additions & 0 deletions b/‎README.md‎
Lines changed: 68 additions & 0 deletions
diff --git a/‎examples/find_text_demo.py‎
Lines changed: 100 additions & 0 deletions b/‎examples/find_text_demo.py‎
Lines changed: 100 additions & 0 deletions
diff --git a/‎sentience/__init__.py‎
Lines changed: 13 additions & 0 deletions b/‎sentience/__init__.py‎
Lines changed: 13 additions & 0 deletions
diff --git a/‎sentience/expect.py‎
Lines changed: 0 additions & 1 deletion b/‎sentience/expect.py‎
Lines changed: 0 additions & 1 deletion
diff --git a/‎sentience/extension/background.js‎
Lines changed: 3 additions & 3 deletions b/‎sentience/extension/background.js‎
Lines changed: 3 additions & 3 deletions
diff --git a/‎sentience/extension/content.js‎
Lines changed: 1 addition & 1 deletion b/‎sentience/extension/content.js‎
Lines changed: 1 addition & 1 deletion
@@ -401,6 +401,74 @@ data_url = screenshot(browser, format="jpeg", quality=85)
 
 </details>
 
+<details>
+<summary><h3>🔎 Text Search - Find Elements by Visible Text</h3></summary>
+
+**`find_text_rect(browser, text, case_sensitive=False, whole_word=False, max_results=10)`** - Find text on page and get exact pixel coordinates
+
+Find buttons, links, or any UI elements by their visible text without needing element IDs or CSS selectors. Returns exact pixel coordinates for each match.
+
+**Example:**
+```python
+from sentience import SentienceBrowser, find_text_rect, click_rect
+
+with SentienceBrowser() as browser:
+    browser.page.goto("https://example.com")
+
+    # Find "Sign In" button
+    result = find_text_rect(browser, "Sign In")
+    if result.status == "success" and result.results:
+        first_match = result.results[0]
+        print(f"Found at: ({first_match.rect.x}, {first_match.rect.y})")
+        print(f"In viewport: {first_match.in_viewport}")
+
+        # Click on the found text
+        if first_match.in_viewport:
+            click_rect(browser, {
+                "x": first_match.rect.x,
+                "y": first_match.rect.y,
+                "w": first_match.rect.width,
+                "h": first_match.rect.height
+            })
+```
+
+**Advanced Options:**
+```python
+# Case-sensitive search
+result = find_text_rect(browser, "LOGIN", case_sensitive=True)
+
+# Whole word only (won't match "login" as part of "loginButton")
+result = find_text_rect(browser, "log", whole_word=True)
+
+# Find multiple matches
+result = find_text_rect(browser, "Buy", max_results=10)
+for match in result.results:
+    if match.in_viewport:
+        print(f"Found '{match.text}' at ({match.rect.x}, {match.rect.y})")
+        print(f"Context: ...{match.context.before}[{match.text}]{match.context.after}...")
+```
+
+**Returns:** `TextRectSearchResult` with:
+- **`status`**: "success" or "error"
+- **`results`**: List of `TextMatch` objects with:
+  - `text` - The matched text
+  - `rect` - Absolute coordinates (with scroll offset)
+  - `viewport_rect` - Viewport-relative coordinates
+  - `context` - Surrounding text (before/after)
+  - `in_viewport` - Whether visible in current viewport
+
+**Use Cases:**
+- Find buttons/links by visible text without CSS selectors
+- Get exact pixel coordinates for click automation
+- Verify text visibility and position on page
+- Search dynamic content that changes frequently
+
+**Note:** Does not consume API credits (runs locally in browser)
+
+**See example:** `examples/find_text_demo.py`
+
+</details>
+
 ---
 
 ## 📋 Reference
 
@@ -0,0 +1,100 @@
+"""
+Text Search Demo - Using find_text_rect() to locate elements by visible text
+
+This example demonstrates how to:
+1. Find text on a webpage and get exact pixel coordinates
+2. Use case-sensitive and whole-word matching options
+3. Click on found text using click_rect()
+4. Handle multiple matches and filter by viewport visibility
+"""
+
+from sentience import SentienceBrowser, click_rect, find_text_rect
+
+
+def main():
+    with SentienceBrowser() as browser:
+        # Navigate to a search page
+        browser.page.goto("https://www.google.com")
+        browser.page.wait_for_load_state("networkidle")
+
+        print("\n" + "=" * 60)
+        print("Text Search Demo")
+        print("=" * 60 + "\n")
+
+        # Example 1: Simple text search
+        print("Example 1: Finding 'Google Search' button")
+        print("-" * 60)
+        result = find_text_rect(browser, "Google Search")
+
+        if result.status == "success" and result.results:
+            print(f"✓ Found {result.matches} match(es) for '{result.query}'")
+            for i, match in enumerate(result.results[:3]):  # Show first 3
+                print(f"\nMatch {i + 1}:")
+                print(f"  Text: '{match.text}'")
+                print(f"  Position: ({match.rect.x:.1f}, {match.rect.y:.1f})")
+                print(f"  Size: {match.rect.width:.1f}x{match.rect.height:.1f} pixels")
+                print(f"  In viewport: {match.in_viewport}")
+                print(f"  Context: ...{match.context.before}[{match.text}]{match.context.after}...")
+        else:
+            print(f"✗ Search failed: {result.error}")
+
+        # Example 2: Find and click search box
+        print("\n\nExample 2: Finding and clicking the search box")
+        print("-" * 60)
+        result = find_text_rect(browser, "Search", max_results=5)
+
+        if result.status == "success" and result.results:
+            # Find the first visible match
+            for match in result.results:
+                if match.in_viewport:
+                    print(f"✓ Found visible match: '{match.text}'")
+                    print(f"  Clicking at ({match.rect.x:.1f}, {match.rect.y:.1f})")
+
+                    # Click in the center of the text
+                    click_result = click_rect(
+                        browser,
+                        {
+                            "x": match.rect.x,
+                            "y": match.rect.y,
+                            "w": match.rect.width,
+                            "h": match.rect.height,
+                        },
+                    )
+
+                    if click_result.success:
+                        print(f"  ✓ Click successful!")
+                    break
+
+        # Example 3: Case-sensitive search
+        print("\n\nExample 3: Case-sensitive search for 'GOOGLE'")
+        print("-" * 60)
+        result_insensitive = find_text_rect(browser, "GOOGLE", case_sensitive=False)
+        result_sensitive = find_text_rect(browser, "GOOGLE", case_sensitive=True)
+
+        print(f"Case-insensitive search: {result_insensitive.matches or 0} matches")
+        print(f"Case-sensitive search: {result_sensitive.matches or 0} matches")
+
+        # Example 4: Whole word search
+        print("\n\nExample 4: Whole word search")
+        print("-" * 60)
+        result_partial = find_text_rect(browser, "Search", whole_word=False)
+        result_whole = find_text_rect(browser, "Search", whole_word=True)
+
+        print(f"Partial word match: {result_partial.matches or 0} matches")
+        print(f"Whole word only: {result_whole.matches or 0} matches")
+
+        # Example 5: Get viewport information
+        print("\n\nExample 5: Viewport and scroll information")
+        print("-" * 60)
+        result = find_text_rect(browser, "Google")
+        if result.status == "success" and result.viewport:
+            print(f"Viewport size: {result.viewport.width}x{result.viewport.height}")
+            # Note: scroll position would be available if viewport had scroll_x/scroll_y fields
+
+        print("\n" + "=" * 60)
+        print("Demo complete!")
+        print("=" * 60 + "\n")
+
+
+if __name__ == "__main__":
+    main()
@@ -41,8 +41,13 @@
     SnapshotFilter,
     SnapshotOptions,
     StorageState,
+    TextContext,
+    TextMatch,
+    TextRect,
+    TextRectSearchResult,
     TokenStats,
     Viewport,
+    ViewportRect,
     WaitResult,
 )
 from .overlay import clear_overlay, show_overlay
@@ -51,6 +56,7 @@
 from .recorder import Recorder, Trace, TraceStep, record
 from .screenshot import screenshot
 from .snapshot import snapshot
+from .text_search import find_text_rect
 from .tracer_factory import SENTIENCE_API_URL, create_tracer
 from .tracing import JsonlTraceSink, TraceEvent, Tracer, TraceSink
 
@@ -96,6 +102,13 @@
     "screenshot",
     "show_overlay",
     "clear_overlay",
+    # Text Search
+    "find_text_rect",
+    "TextRectSearchResult",
+    "TextMatch",
+    "TextRect",
+    "ViewportRect",
+    "TextContext",
     # Agent Layer (Phase 1 & 2)
     "BaseAgent",
     "LLMProvider",
 
@@ -3,7 +3,6 @@
 """
 
 import time
-from typing import Optional, Union
 
 from .browser import SentienceBrowser
 from .models import Element
 
@@ -144,13 +144,13 @@ async function handleScreenshotCapture(_tabId, options = {}) {
 async function handleSnapshotProcessing(rawData, options = {}) {
     const MAX_ELEMENTS = 10000; // Safety limit to prevent hangs
     const startTime = performance.now();
-    
+
     try {
         // Safety check: limit element count to prevent hangs
         if (!Array.isArray(rawData)) {
             throw new Error('rawData must be an array');
         }
-        
+
         if (rawData.length > MAX_ELEMENTS) {
             console.warn(`[Sentience Background] ⚠️ Large dataset: ${rawData.length} elements. Limiting to ${MAX_ELEMENTS} to prevent hangs.`);
             rawData = rawData.slice(0, MAX_ELEMENTS);
@@ -186,7 +186,7 @@ async function handleSnapshotProcessing(rawData, options = {}) {
             // Add timeout protection (18 seconds - less than content.js timeout)
             analyzedElements = await Promise.race([
                 wasmPromise,
-                new Promise((_, reject) => 
+                new Promise((_, reject) =>
                     setTimeout(() => reject(new Error('WASM processing timeout (>18s)')), 18000)
                 )
             ]);
 
@@ -92,7 +92,7 @@ function handleSnapshotRequest(data) {
                 if (responded) return; // Already responded via timeout
                 responded = true;
                 clearTimeout(timeoutId);
-                
+
                 const duration = performance.now() - startTime;
 
                 // Handle Chrome extension errors (e.g., background script crashed)