SentienceAPI · rcholic · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025 · Dec 24, 2025
diff --git a/README.md b/README.md
@@ -9,16 +9,66 @@ pip install -e .
 
 # Install Playwright browsers (required)
 playwright install chromium
+
+# For LLM Agent features (optional)
+pip install openai  # For OpenAI models
+pip install anthropic  # For Claude models
+pip install transformers torch  # For local LLMs
+```
+
+## Quick Start: Choose Your Abstraction Level
+
+Sentience SDK offers **three abstraction levels** - use what fits your needs:
+
+### 🎯 **Level 3: Natural Language (Easiest)** - For non-technical users
+
+```python
+from sentience import SentienceBrowser, ConversationalAgent
+from sentience.llm_provider import OpenAIProvider
+
+browser = SentienceBrowser()
+llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
+agent = ConversationalAgent(browser, llm)
+
+with browser:
+    response = agent.execute("Search for magic mouse on google.com")
+    print(response)
+    # → "I searched for 'magic mouse' and found several results.
+    #    The top result is from amazon.com selling Magic Mouse 2 for $79."
 ```
 
-## Quick Start
+**Best for:** End users, chatbots, no-code platforms
+**Code required:** 3-5 lines
+**Technical knowledge:** None
+
+### ⚙️ **Level 2: Technical Commands (Recommended)** - For AI developers
+
+```python
+from sentience import SentienceBrowser, SentienceAgent
+from sentience.llm_provider import OpenAIProvider
+
+browser = SentienceBrowser()
+llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
+agent = SentienceAgent(browser, llm)
+
+with browser:
+    browser.page.goto("https://google.com")
+    agent.act("Click the search box")
+    agent.act("Type 'magic mouse' into the search field")
+    agent.act("Press Enter key")
+```
+
+**Best for:** Building AI agents, automation scripts
+**Code required:** 10-15 lines
+**Technical knowledge:** Medium (Python basics)
+
+### 🔧 **Level 1: Direct SDK (Most Control)** - For production automation
 
 ```python
 from sentience import SentienceBrowser, snapshot, find, click
 
-# Start browser with extension
 with SentienceBrowser(headless=False) as browser:
-    browser.goto("https://example.com", wait_until="domcontentloaded")
+    browser.page.goto("https://example.com")
 
     # Take snapshot - captures all interactive elements
     snap = snapshot(browser)
@@ -31,6 +81,10 @@ with SentienceBrowser(headless=False) as browser:
         print(f"Click success: {result.success}")
 ```
 
+**Best for:** Maximum control, performance-critical apps
+**Code required:** 20-50 lines
+**Technical knowledge:** High (SDK API, selectors)
+
 ## Real-World Example: Amazon Shopping Bot
 
 This example demonstrates navigating Amazon, finding products, and adding items to cart:

diff --git a/examples/agent_layers_demo.py b/examples/agent_layers_demo.py
@@ -0,0 +1,222 @@
+"""
+Demonstration of all three abstraction layers in Sentience SDK
+
+Layer 1: Direct SDK (Full Control)
+Layer 2: SentienceAgent (Technical Commands)
+Layer 3: ConversationalAgent (Natural Language)
+
+This script shows how the same task can be accomplished at different abstraction levels.
+"""
+
+import os
+from dotenv import load_dotenv
+
+# Load environment variables
+load_dotenv()
+
+def demo_layer1_direct_sdk():
+    """
+    Layer 1: Direct SDK Usage
+    - Full control over every action
+    - Requires knowing exact element selectors
+    - 50+ lines of code for typical automation
+    """
+    print("\n" + "="*70)
+    print("LAYER 1: Direct SDK Usage (Full Control)")
+    print("="*70)
+
+    from sentience import SentienceBrowser, snapshot, find, click, type_text, press
+
+    with SentienceBrowser(headless=False) as browser:
+        # Navigate
+        browser.page.goto("https://google.com")
+
+        # Get snapshot
+        snap = snapshot(browser)
+
+        # Find search box manually
+        search_box = find(snap, "role=searchbox")
+        if not search_box:
+            search_box = find(snap, "role=textbox")
+
+        # Click search box
+        click(browser, search_box.id)
+
+        # Type query
+        type_text(browser, search_box.id, "magic mouse")
+
+        # Press Enter
+        press(browser, "Enter")
+
+        print("\n✅ Layer 1 Demo Complete")
+        print("   Code required: ~20 lines")
+        print("   Technical knowledge: High")
+        print("   Flexibility: Maximum")
+
+
+def demo_layer2_sentience_agent():
+    """
+    Layer 2: SentienceAgent (Technical Commands)
+    - High-level commands with LLM intelligence
+    - No need to know selectors
+    - 15 lines of code for typical automation
+    """
+    print("\n" + "="*70)
+    print("LAYER 2: SentienceAgent (Technical Commands)")
+    print("="*70)
+
+    from sentience import SentienceBrowser, SentienceAgent
+    from sentience.llm_provider import OpenAIProvider
+
+    # Initialize
+    browser = SentienceBrowser(headless=False)
+    llm = OpenAIProvider(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
+    agent = SentienceAgent(browser, llm, verbose=True)
+
+    with browser:
+        browser.page.goto("https://google.com")
+
+        # Execute technical commands
+        agent.act("Click the search box")
+        agent.act("Type 'magic mouse' into the search field")
+        agent.act("Press Enter key")
+
+        print("\n✅ Layer 2 Demo Complete")
+        print("   Code required: ~10 lines")
+        print("   Technical knowledge: Medium")
+        print("   Flexibility: High")
+        print(f"   Tokens used: {agent.get_token_stats()['total_tokens']}")
+
+
+def demo_layer3_conversational_agent():
+    """
+    Layer 3: ConversationalAgent (Natural Language)
+    - Pure natural language interface
+    - Automatic planning and execution
+    - 3 lines of code for typical automation
+    """
+    print("\n" + "="*70)
+    print("LAYER 3: ConversationalAgent (Natural Language)")
+    print("="*70)
+
+    from sentience import SentienceBrowser, ConversationalAgent
+    from sentience.llm_provider import OpenAIProvider
+
+    # Initialize
+    browser = SentienceBrowser(headless=False)
+    llm = OpenAIProvider(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
+    agent = ConversationalAgent(browser, llm, verbose=True)
+
+    with browser:
+        # Execute in natural language (agent plans and executes automatically)
+        response = agent.execute("Search for magic mouse on google.com")
+
+        print("\n✅ Layer 3 Demo Complete")
+        print("   Code required: ~5 lines")
+        print("   Technical knowledge: None")
+        print("   Flexibility: Medium")
+        print(f"   Agent Response: {response}")
+
+
+def demo_layer3_with_local_llm():
+    """
+    Layer 3 with Local LLM (Zero Cost)
+    - Uses local Qwen 2.5 3B model
+    - No API costs
+    - Runs on your hardware
+    """
+    print("\n" + "="*70)
+    print("LAYER 3: ConversationalAgent with Local LLM (Zero Cost)")
+    print("="*70)
+
+    from sentience import SentienceBrowser, ConversationalAgent
+    from sentience.llm_provider import LocalLLMProvider
+
+    # Initialize with local LLM
+    browser = SentienceBrowser(headless=False)
+    llm = LocalLLMProvider(
+        model_name="Qwen/Qwen2.5-3B-Instruct",
+        device="auto",  # Use CUDA if available
+        load_in_4bit=True  # Save memory with quantization
+    )
+    agent = ConversationalAgent(browser, llm, verbose=True)
+
+    with browser:
+        # Execute in natural language
+        response = agent.execute("Go to google.com and search for python tutorials")
+
+        print("\n✅ Layer 3 with Local LLM Demo Complete")
+        print("   API Cost: $0 (runs locally)")
+        print("   Privacy: 100% (no data sent to cloud)")
+        print(f"   Agent Response: {response}")
+
+
+def demo_comparison():
+    """
+    Side-by-side comparison of all layers
+    """
+    print("\n" + "="*70)
+    print("COMPARISON: All Three Layers")
+    print("="*70)
+
+    comparison_table = """
+    | Feature                  | Layer 1 (SDK)    | Layer 2 (Agent)  | Layer 3 (Conversational) |
+    |--------------------------|------------------|------------------|--------------------------|
+    | Lines of code            | 50+              | 15               | 3-5                      |
+    | Technical knowledge      | High             | Medium           | None                     |
+    | Requires selectors?      | Yes              | No               | No                       |
+    | LLM required?            | No               | Yes              | Yes                      |
+    | Cost per action          | $0               | ~$0.005          | ~$0.010                  |
+    | Speed                    | Fastest          | Fast             | Medium                   |
+    | Error handling           | Manual           | Auto-retry       | Auto-recovery            |
+    | Multi-step planning      | Manual           | Manual           | Automatic                |
+    | Natural language I/O     | No               | Commands only    | Full conversation        |
+    | Best for                 | Production       | AI developers    | End users                |
+    """
+
+    print(comparison_table)
+
+
+def main():
+    """Run all demos"""
+    print("\n" + "="*70)
+    print("SENTIENCE SDK: Multi-Layer Abstraction Demo")
+    print("="*70)
+    print("\nThis demo shows how to use the SDK at different abstraction levels:")
+    print("  1. Layer 1: Direct SDK (maximum control)")
+    print("  2. Layer 2: SentienceAgent (technical commands)")
+    print("  3. Layer 3: ConversationalAgent (natural language)")
+    print("\nChoose which demo to run:")
+    print("  1 - Layer 1: Direct SDK")
+    print("  2 - Layer 2: SentienceAgent")
+    print("  3 - Layer 3: ConversationalAgent (OpenAI)")
+    print("  4 - Layer 3: ConversationalAgent (Local LLM)")
+    print("  5 - Show comparison table")
+    print("  0 - Exit")
+
+    choice = input("\nEnter your choice (0-5): ").strip()
+
+    if choice == "1":
+        demo_layer1_direct_sdk()
+    elif choice == "2":
+        if not os.getenv("OPENAI_API_KEY"):
+            print("\n❌ Error: OPENAI_API_KEY not set")
+            return
+        demo_layer2_sentience_agent()
+    elif choice == "3":
+        if not os.getenv("OPENAI_API_KEY"):
+            print("\n❌ Error: OPENAI_API_KEY not set")
+            return
+        demo_layer3_conversational_agent()
+    elif choice == "4":
+        demo_layer3_with_local_llm()
+    elif choice == "5":
+        demo_comparison()
+    elif choice == "0":
+        print("Goodbye!")
+    else:
+        print("Invalid choice")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/examples/test_local_llm_agent.py b/examples/test_local_llm_agent.py
@@ -0,0 +1,86 @@
+"""
+Test script for LocalLLMProvider with Qwen2.5-3B-Instruct
+Demonstrates using a local LLM with SentienceAgent
+"""
+
+from sentience.llm_provider import LocalLLMProvider
+
+def test_local_llm_basic():
+    """Test basic LLM response generation"""
+    print("="*70)
+    print("Testing LocalLLMProvider with Qwen2.5-3B-Instruct")
+    print("="*70)
+
+    # Initialize local LLM
+    # Using the model from your local cache
+    llm = LocalLLMProvider(
+        model_name="Qwen/Qwen2.5-3B-Instruct",
+        device="auto",  # Will use CUDA if available, else CPU
+        load_in_4bit=False,  # Set to True to save memory
+        torch_dtype="auto"
+    )
+
+    print("\n" + "="*70)
+    print("Test 1: Simple question")
+    print("="*70)
+
+    response = llm.generate(
+        system_prompt="You are a helpful web automation assistant.",
+        user_prompt="What is 2+2?",
+        max_new_tokens=50,
+        temperature=0.1
+    )
+
+    print(f"Response: {response.content}")
+    print(f"Tokens: {response.total_tokens} (prompt: {response.prompt_tokens}, completion: {response.completion_tokens})")
+
+    print("\n" + "="*70)
+    print("Test 2: Action parsing (for agent)")
+    print("="*70)
+
+    system_prompt = """You are an AI web automation agent.
+
+GOAL: Click the search box
+
+VISIBLE ELEMENTS (sorted by importance, max 50):
+[1] <button> "Sign In" {PRIMARY,CLICKABLE,color:blue} @ (100,50) (Imp:900)
+[2] <textbox> "" {CLICKABLE} @ (200,100) (Imp:850)
+[3] <link> "Help" {} @ (50,150) (Imp:700)
+
+VISUAL CUES:
+- {PRIMARY}: Main call-to-action element
+- {CLICKABLE}: Element is clickable
+- {color:X}: Background color name
+
+RESPONSE FORMAT (return ONLY the function call):
+- CLICK(id) - Click element by ID
+- TYPE(id, "text") - Type text into element
+- PRESS("key") - Press keyboard key
+- FINISH() - Task complete
+"""
+
+    user_prompt = "What is the next step to achieve the goal?"
+
+    response = llm.generate(
+        system_prompt=system_prompt,
+        user_prompt=user_prompt,
+        max_new_tokens=20,
+        temperature=0.0
+    )
+
+    print(f"Agent Response: {response.content}")
+    print(f"Tokens: {response.total_tokens}")
+
+    # Check if response is parseable
+    if "CLICK(2)" in response.content or "click(2)" in response.content.lower():
+        print("\n✅ SUCCESS: LLM correctly identified textbox (element 2) as search box!")
+    else:
+        print(f"\n⚠️  Response may need adjustment: {response.content}")
+
+    print("\n" + "="*70)
+    print("LocalLLMProvider Test Complete!")
+    print("="*70)
+
+
+if __name__ == "__main__":
+    test_local_llm_basic()