docs

rcholic · rcholic · commit 7fcf91b1ce0b · 2026-01-02T11:33:54.000-08:00
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
@@ -32,6 +32,24 @@ jobs:
     - name: Install dependencies
       run: |
         pip install -e ".[dev]"
+        pip install pre-commit mypy types-requests
+
+    - name: Lint with pre-commit
+      continue-on-error: true
+      run: |
+        pre-commit run --all-files
+
+    - name: Type check with mypy
+      continue-on-error: true
+      run: |
+        mypy sentience --ignore-missing-imports --no-strict-optional
+
+    - name: Check code style
+      continue-on-error: true
+      run: |
+        black --check sentience tests --line-length=100
+        isort --check-only --profile black sentience tests
+        flake8 sentience tests --max-line-length=100 --extend-ignore=E203,W503,E501 --max-complexity=15
 
     - name: Build extension (if needed)
       if: runner.os != 'Windows'
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -50,20 +50,19 @@ repos:
           - '--max-complexity=15'
         exclude: ^(venv/|\.venv/|build/|dist/|tests/fixtures/)
 
-  # Type checking with mypy (disabled for now - too strict)
-  # Uncomment to enable strict type checking
-  # - repo: https://github.com/pre-commit/mirrors-mypy
-  #   rev: v1.8.0
-  #   hooks:
-  #     - id: mypy
-  #       additional_dependencies:
-  #         - pydantic>=2.0
-  #         - types-requests
-  #       args:
-  #         - '--ignore-missing-imports'
-  #         - '--no-strict-optional'
-  #         - '--warn-unused-ignores'
-  #       exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
+  # Type checking with mypy
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: v1.8.0
+    hooks:
+      - id: mypy
+        additional_dependencies:
+          - pydantic>=2.0
+          - types-requests
+        args:
+          - '--ignore-missing-imports'
+          - '--no-strict-optional'
+          - '--warn-unused-ignores'
+        exclude: ^(tests/|examples/|venv/|\.venv/|build/|dist/)
 
   # Security checks
   - repo: https://github.com/PyCQA/bandit
diff --git a/sentience/__init__.py b/sentience/__init__.py
@@ -55,6 +55,7 @@
 from .read import read
 from .recorder import Recorder, Trace, TraceStep, record
 from .screenshot import screenshot
+from .sentience_methods import AgentAction, SentienceAction
 from .snapshot import snapshot
 from .text_search import find_text_rect
 from .tracer_factory import SENTIENCE_API_URL, create_tracer
@@ -150,4 +151,7 @@
     "format_snapshot_for_llm",
     # Agent Config (v0.12.0+)
     "AgentConfig",
+    # Enums
+    "SentienceAction",
+    "AgentAction",
 ]
diff --git a/sentience/actions.py b/sentience/actions.py
@@ -9,6 +9,7 @@
 from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .browser_evaluator import BrowserEvaluator
 from .models import ActionResult, BBox, Snapshot
+from .sentience_methods import SentienceAction
 from .snapshot import snapshot, snapshot_async
 
 
@@ -62,22 +63,22 @@ def click(  # noqa: C901
             else:
                 # Fallback to JS click if element not found in snapshot
                 try:
-                    success = BrowserEvaluator.call_sentience_method(
-                        browser.page, "click", element_id
+                    success = BrowserEvaluator.invoke(
+                        browser.page, SentienceAction.CLICK, element_id
                     )
                 except Exception:
                     # Navigation might have destroyed context, assume success if URL changed
                     success = True
         except Exception:
             # Fallback to JS click on error
             try:
-                success = BrowserEvaluator.call_sentience_method(browser.page, "click", element_id)
+                success = BrowserEvaluator.invoke(browser.page, SentienceAction.CLICK, element_id)
             except Exception:
                 # Navigation might have destroyed context, assume success if URL changed
                 success = True
     else:
         # Legacy JS-based click
-        success = BrowserEvaluator.call_sentience_method(browser.page, "click", element_id)
+        success = BrowserEvaluator.invoke(browser.page, SentienceAction.CLICK, element_id)
 
     # Wait a bit for navigation/DOM updates
     try:
diff --git a/sentience/browser_evaluator.py b/sentience/browser_evaluator.py
@@ -13,6 +13,7 @@
 from playwright.sync_api import Page
 
 from .browser import AsyncSentienceBrowser, SentienceBrowser
+from .sentience_methods import SentienceMethod
 
 
 class BrowserEvaluator:
@@ -126,18 +127,18 @@ async def _gather_diagnostics_async(page: AsyncPage) -> dict[str, Any]:
             return {"error": "Could not gather diagnostics"}
 
     @staticmethod
-    def call_sentience_method(
+    def invoke(
         page: Page,
-        method_name: str,
+        method: SentienceMethod | str,
         *args: Any,
         **kwargs: Any,
     ) -> Any:
         """
-        Call a window.sentience method with error handling.
+        Invoke a window.sentience method with error handling (sync).
 
         Args:
             page: Playwright Page instance (sync)
-            method_name: Name of the method (e.g., "snapshot", "click")
+            method: SentienceMethod enum value or method name string (e.g., SentienceMethod.SNAPSHOT or "snapshot")
             *args: Positional arguments to pass to the method
             **kwargs: Keyword arguments to pass to the method
 
@@ -146,7 +147,16 @@ def call_sentience_method(
 
         Raises:
             RuntimeError: If method is not available or call fails
+
+        Example:
+            ```python
+            result = BrowserEvaluator.invoke(page, SentienceMethod.SNAPSHOT, limit=50)
+            success = BrowserEvaluator.invoke(page, SentienceMethod.CLICK, element_id)
+            ```
         """
+        # Convert enum to string if needed
+        method_name = method.value if isinstance(method, SentienceMethod) else method
+
         # Build JavaScript call
         if args and kwargs:
             # Both args and kwargs - use object spread
@@ -184,18 +194,18 @@ def call_sentience_method(
         return result
 
     @staticmethod
-    async def call_sentience_method_async(
+    async def invoke_async(
         page: AsyncPage,
-        method_name: str,
+        method: SentienceMethod | str,
         *args: Any,
         **kwargs: Any,
     ) -> Any:
         """
-        Call a window.sentience method with error handling (async).
+        Invoke a window.sentience method with error handling (async).
 
         Args:
             page: Playwright AsyncPage instance
-            method_name: Name of the method (e.g., "snapshot", "click")
+            method: SentienceMethod enum value or method name string (e.g., SentienceMethod.SNAPSHOT or "snapshot")
             *args: Positional arguments to pass to the method
             **kwargs: Keyword arguments to pass to the method
 
@@ -204,7 +214,16 @@ async def call_sentience_method_async(
 
         Raises:
             RuntimeError: If method is not available or call fails
+
+        Example:
+            ```python
+            result = await BrowserEvaluator.invoke_async(page, SentienceMethod.SNAPSHOT, limit=50)
+            success = await BrowserEvaluator.invoke_async(page, SentienceMethod.CLICK, element_id)
+            ```
         """
+        # Convert enum to string if needed
+        method_name = method.value if isinstance(method, SentienceMethod) else method
+
         # Build JavaScript call
         if args and kwargs:
             js_code = f"""
@@ -240,18 +259,19 @@ async def call_sentience_method_async(
     @staticmethod
     def verify_method_exists(
         page: Page,
-        method_name: str,
+        method: SentienceMethod | str,
     ) -> bool:
         """
         Verify that a window.sentience method exists.
 
         Args:
             page: Playwright Page instance (sync)
-            method_name: Name of the method to check
+            method: SentienceMethod enum value or method name string
 
         Returns:
             True if method exists, False otherwise
         """
+        method_name = method.value if isinstance(method, SentienceMethod) else method
         try:
             return page.evaluate(f"typeof window.sentience.{method_name} !== 'undefined'")
         except Exception:
@@ -260,18 +280,19 @@ def verify_method_exists(
     @staticmethod
     async def verify_method_exists_async(
         page: AsyncPage,
-        method_name: str,
+        method: SentienceMethod | str,
     ) -> bool:
         """
         Verify that a window.sentience method exists (async).
 
         Args:
             page: Playwright AsyncPage instance
-            method_name: Name of the method to check
+            method: SentienceMethod enum value or method name string
 
         Returns:
             True if method exists, False otherwise
         """
+        method_name = method.value if isinstance(method, SentienceMethod) else method
         try:
             return await page.evaluate(f"typeof window.sentience.{method_name} !== 'undefined'")
         except Exception:
diff --git a/sentience/sentience_methods.py b/sentience/sentience_methods.py
@@ -0,0 +1,87 @@
+"""
+Enums for Sentience API methods and agent actions.
+
+This module provides type-safe enums for:
+1. window.sentience API methods (extension-level)
+2. Agent action types (high-level automation commands)
+"""
+
+from enum import Enum
+
+
+class SentienceMethod(str, Enum):
+    """
+    Enum for window.sentience API methods.
+
+    These are the actual methods available on the window.sentience object
+    injected by the Chrome extension.
+    """
+
+    # Core snapshot and element discovery
+    SNAPSHOT = "snapshot"
+    """Take a snapshot of the current page with element geometry and metadata."""
+
+    # Element interaction
+    CLICK = "click"
+    """Click an element by its ID from the snapshot registry."""
+
+    # Content extraction
+    READ = "read"
+    """Read page content as raw HTML, text, or markdown."""
+
+    FIND_TEXT_RECT = "findTextRect"
+    """Find exact pixel coordinates of text occurrences on the page."""
+
+    # Visual overlay
+    SHOW_OVERLAY = "showOverlay"
+    """Show visual overlay highlighting elements with importance scores."""
+
+    CLEAR_OVERLAY = "clearOverlay"
+    """Clear the visual overlay."""
+
+    # Developer tools
+    START_RECORDING = "startRecording"
+    """Start recording mode for golden set collection (developer tool)."""
+
+    def __str__(self) -> str:
+        """Return the method name as a string."""
+        return self.value
+
+
+class AgentAction(str, Enum):
+    """
+    Enum for high-level agent action types.
+
+    These are the action commands that agents can execute. They may use
+    one or more window.sentience methods or Playwright APIs directly.
+    """
+
+    # Element interaction
+    CLICK = "click"
+    """Click an element by ID. Uses window.sentience.click() or Playwright mouse.click()."""
+
+    TYPE = "type"
+    """Type text into an input element. Uses Playwright keyboard.type() directly."""
+
+    PRESS = "press"
+    """Press a keyboard key (Enter, Escape, Tab, etc.). Uses Playwright keyboard.press()."""
+
+    # Navigation
+    NAVIGATE = "navigate"
+    """Navigate to a URL. Uses Playwright page.goto() directly."""
+
+    SCROLL = "scroll"
+    """Scroll the page or an element. Uses Playwright page.mouse.wheel() or element.scrollIntoView()."""
+
+    # Completion
+    FINISH = "finish"
+    """Signal that the agent task is complete. No browser action, just status update."""
+
+    # Wait/verification
+    WAIT = "wait"
+    """Wait for a condition or duration. Uses Playwright wait_for_* methods."""
+
+    def __str__(self) -> str:
+        """Return the action name as a string."""
+        return self.value
+
diff --git a/sentience/snapshot.py b/sentience/snapshot.py
@@ -13,6 +13,7 @@
 from .browser import AsyncSentienceBrowser, SentienceBrowser
 from .browser_evaluator import BrowserEvaluator
 from .models import Snapshot, SnapshotOptions
+from .sentience_methods import SentienceMethod
 
 # Maximum payload size for API requests (10MB server limit)
 MAX_PAYLOAD_BYTES = 10 * 1024 * 1024
@@ -171,7 +172,7 @@ def _snapshot_via_api(
     if options.screenshot is not False:
         raw_options["screenshot"] = options.screenshot
 
-    raw_result = BrowserEvaluator.call_sentience_method(browser.page, "snapshot", **raw_options)
+    raw_result = BrowserEvaluator.invoke(browser.page, SentienceAction.SNAPSHOT, **raw_options)
 
     # Save trace if requested (save raw data before API processing)
     if options.save_trace:
diff --git a/sentience/text_search.py b/sentience/text_search.py
@@ -95,7 +95,7 @@ def find_text_rect(
     BrowserEvaluator.wait_for_extension(browser.page, timeout_ms=5000)
 
     # Verify findTextRect method exists (for older extension versions that don't have it)
-    if not BrowserEvaluator.verify_method_exists(browser.page, "findTextRect"):
+    if not BrowserEvaluator.verify_method_exists(browser.page, SentienceAction.FIND_TEXT_RECT):
         raise RuntimeError(
             "window.sentience.findTextRect is not available. "
             "Please update the Sentience extension to the latest version."