Skip to content

Commit f9d64a0

Browse files
authored
Merge pull request #47 from SentienceAPI/agent_abstraction2
Phase 2/2: Conversational Agent
2 parents 96e88f2 + 7f8f450 commit f9d64a0

File tree

5 files changed

+1275
-4
lines changed

5 files changed

+1275
-4
lines changed

README.md

Lines changed: 57 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -9,16 +9,66 @@ pip install -e .
99

1010
# Install Playwright browsers (required)
1111
playwright install chromium
12+
13+
# For LLM Agent features (optional)
14+
pip install openai # For OpenAI models
15+
pip install anthropic # For Claude models
16+
pip install transformers torch # For local LLMs
17+
```
18+
19+
## Quick Start: Choose Your Abstraction Level
20+
21+
Sentience SDK offers **three abstraction levels** - use what fits your needs:
22+
23+
### 🎯 **Level 3: Natural Language (Easiest)** - For non-technical users
24+
25+
```python
26+
from sentience import SentienceBrowser, ConversationalAgent
27+
from sentience.llm_provider import OpenAIProvider
28+
29+
browser = SentienceBrowser()
30+
llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
31+
agent = ConversationalAgent(browser, llm)
32+
33+
with browser:
34+
response = agent.execute("Search for magic mouse on google.com")
35+
print(response)
36+
# → "I searched for 'magic mouse' and found several results.
37+
# The top result is from amazon.com selling Magic Mouse 2 for $79."
1238
```
1339

14-
## Quick Start
40+
**Best for:** End users, chatbots, no-code platforms
41+
**Code required:** 3-5 lines
42+
**Technical knowledge:** None
43+
44+
### ⚙️ **Level 2: Technical Commands (Recommended)** - For AI developers
45+
46+
```python
47+
from sentience import SentienceBrowser, SentienceAgent
48+
from sentience.llm_provider import OpenAIProvider
49+
50+
browser = SentienceBrowser()
51+
llm = OpenAIProvider(api_key="your-key", model="gpt-4o")
52+
agent = SentienceAgent(browser, llm)
53+
54+
with browser:
55+
browser.page.goto("https://google.com")
56+
agent.act("Click the search box")
57+
agent.act("Type 'magic mouse' into the search field")
58+
agent.act("Press Enter key")
59+
```
60+
61+
**Best for:** Building AI agents, automation scripts
62+
**Code required:** 10-15 lines
63+
**Technical knowledge:** Medium (Python basics)
64+
65+
### 🔧 **Level 1: Direct SDK (Most Control)** - For production automation
1566

1667
```python
1768
from sentience import SentienceBrowser, snapshot, find, click
1869

19-
# Start browser with extension
2070
with SentienceBrowser(headless=False) as browser:
21-
browser.goto("https://example.com", wait_until="domcontentloaded")
71+
browser.page.goto("https://example.com")
2272

2373
# Take snapshot - captures all interactive elements
2474
snap = snapshot(browser)
@@ -31,6 +81,10 @@ with SentienceBrowser(headless=False) as browser:
3181
print(f"Click success: {result.success}")
3282
```
3383

84+
**Best for:** Maximum control, performance-critical apps
85+
**Code required:** 20-50 lines
86+
**Technical knowledge:** High (SDK API, selectors)
87+
3488
## Real-World Example: Amazon Shopping Bot
3589

3690
This example demonstrates navigating Amazon, finding products, and adding items to cart:

examples/agent_layers_demo.py

Lines changed: 222 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,222 @@
1+
"""
2+
Demonstration of all three abstraction layers in Sentience SDK
3+
4+
Layer 1: Direct SDK (Full Control)
5+
Layer 2: SentienceAgent (Technical Commands)
6+
Layer 3: ConversationalAgent (Natural Language)
7+
8+
This script shows how the same task can be accomplished at different abstraction levels.
9+
"""
10+
11+
import os
12+
from dotenv import load_dotenv
13+
14+
# Load environment variables
15+
load_dotenv()
16+
17+
def demo_layer1_direct_sdk():
18+
"""
19+
Layer 1: Direct SDK Usage
20+
- Full control over every action
21+
- Requires knowing exact element selectors
22+
- 50+ lines of code for typical automation
23+
"""
24+
print("\n" + "="*70)
25+
print("LAYER 1: Direct SDK Usage (Full Control)")
26+
print("="*70)
27+
28+
from sentience import SentienceBrowser, snapshot, find, click, type_text, press
29+
30+
with SentienceBrowser(headless=False) as browser:
31+
# Navigate
32+
browser.page.goto("https://google.com")
33+
34+
# Get snapshot
35+
snap = snapshot(browser)
36+
37+
# Find search box manually
38+
search_box = find(snap, "role=searchbox")
39+
if not search_box:
40+
search_box = find(snap, "role=textbox")
41+
42+
# Click search box
43+
click(browser, search_box.id)
44+
45+
# Type query
46+
type_text(browser, search_box.id, "magic mouse")
47+
48+
# Press Enter
49+
press(browser, "Enter")
50+
51+
print("\n✅ Layer 1 Demo Complete")
52+
print(" Code required: ~20 lines")
53+
print(" Technical knowledge: High")
54+
print(" Flexibility: Maximum")
55+
56+
57+
def demo_layer2_sentience_agent():
58+
"""
59+
Layer 2: SentienceAgent (Technical Commands)
60+
- High-level commands with LLM intelligence
61+
- No need to know selectors
62+
- 15 lines of code for typical automation
63+
"""
64+
print("\n" + "="*70)
65+
print("LAYER 2: SentienceAgent (Technical Commands)")
66+
print("="*70)
67+
68+
from sentience import SentienceBrowser, SentienceAgent
69+
from sentience.llm_provider import OpenAIProvider
70+
71+
# Initialize
72+
browser = SentienceBrowser(headless=False)
73+
llm = OpenAIProvider(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o-mini")
74+
agent = SentienceAgent(browser, llm, verbose=True)
75+
76+
with browser:
77+
browser.page.goto("https://google.com")
78+
79+
# Execute technical commands
80+
agent.act("Click the search box")
81+
agent.act("Type 'magic mouse' into the search field")
82+
agent.act("Press Enter key")
83+
84+
print("\n✅ Layer 2 Demo Complete")
85+
print(" Code required: ~10 lines")
86+
print(" Technical knowledge: Medium")
87+
print(" Flexibility: High")
88+
print(f" Tokens used: {agent.get_token_stats()['total_tokens']}")
89+
90+
91+
def demo_layer3_conversational_agent():
92+
"""
93+
Layer 3: ConversationalAgent (Natural Language)
94+
- Pure natural language interface
95+
- Automatic planning and execution
96+
- 3 lines of code for typical automation
97+
"""
98+
print("\n" + "="*70)
99+
print("LAYER 3: ConversationalAgent (Natural Language)")
100+
print("="*70)
101+
102+
from sentience import SentienceBrowser, ConversationalAgent
103+
from sentience.llm_provider import OpenAIProvider
104+
105+
# Initialize
106+
browser = SentienceBrowser(headless=False)
107+
llm = OpenAIProvider(api_key=os.getenv("OPENAI_API_KEY"), model="gpt-4o")
108+
agent = ConversationalAgent(browser, llm, verbose=True)
109+
110+
with browser:
111+
# Execute in natural language (agent plans and executes automatically)
112+
response = agent.execute("Search for magic mouse on google.com")
113+
114+
print("\n✅ Layer 3 Demo Complete")
115+
print(" Code required: ~5 lines")
116+
print(" Technical knowledge: None")
117+
print(" Flexibility: Medium")
118+
print(f" Agent Response: {response}")
119+
120+
121+
def demo_layer3_with_local_llm():
122+
"""
123+
Layer 3 with Local LLM (Zero Cost)
124+
- Uses local Qwen 2.5 3B model
125+
- No API costs
126+
- Runs on your hardware
127+
"""
128+
print("\n" + "="*70)
129+
print("LAYER 3: ConversationalAgent with Local LLM (Zero Cost)")
130+
print("="*70)
131+
132+
from sentience import SentienceBrowser, ConversationalAgent
133+
from sentience.llm_provider import LocalLLMProvider
134+
135+
# Initialize with local LLM
136+
browser = SentienceBrowser(headless=False)
137+
llm = LocalLLMProvider(
138+
model_name="Qwen/Qwen2.5-3B-Instruct",
139+
device="auto", # Use CUDA if available
140+
load_in_4bit=True # Save memory with quantization
141+
)
142+
agent = ConversationalAgent(browser, llm, verbose=True)
143+
144+
with browser:
145+
# Execute in natural language
146+
response = agent.execute("Go to google.com and search for python tutorials")
147+
148+
print("\n✅ Layer 3 with Local LLM Demo Complete")
149+
print(" API Cost: $0 (runs locally)")
150+
print(" Privacy: 100% (no data sent to cloud)")
151+
print(f" Agent Response: {response}")
152+
153+
154+
def demo_comparison():
155+
"""
156+
Side-by-side comparison of all layers
157+
"""
158+
print("\n" + "="*70)
159+
print("COMPARISON: All Three Layers")
160+
print("="*70)
161+
162+
comparison_table = """
163+
| Feature | Layer 1 (SDK) | Layer 2 (Agent) | Layer 3 (Conversational) |
164+
|--------------------------|------------------|------------------|--------------------------|
165+
| Lines of code | 50+ | 15 | 3-5 |
166+
| Technical knowledge | High | Medium | None |
167+
| Requires selectors? | Yes | No | No |
168+
| LLM required? | No | Yes | Yes |
169+
| Cost per action | $0 | ~$0.005 | ~$0.010 |
170+
| Speed | Fastest | Fast | Medium |
171+
| Error handling | Manual | Auto-retry | Auto-recovery |
172+
| Multi-step planning | Manual | Manual | Automatic |
173+
| Natural language I/O | No | Commands only | Full conversation |
174+
| Best for | Production | AI developers | End users |
175+
"""
176+
177+
print(comparison_table)
178+
179+
180+
def main():
181+
"""Run all demos"""
182+
print("\n" + "="*70)
183+
print("SENTIENCE SDK: Multi-Layer Abstraction Demo")
184+
print("="*70)
185+
print("\nThis demo shows how to use the SDK at different abstraction levels:")
186+
print(" 1. Layer 1: Direct SDK (maximum control)")
187+
print(" 2. Layer 2: SentienceAgent (technical commands)")
188+
print(" 3. Layer 3: ConversationalAgent (natural language)")
189+
print("\nChoose which demo to run:")
190+
print(" 1 - Layer 1: Direct SDK")
191+
print(" 2 - Layer 2: SentienceAgent")
192+
print(" 3 - Layer 3: ConversationalAgent (OpenAI)")
193+
print(" 4 - Layer 3: ConversationalAgent (Local LLM)")
194+
print(" 5 - Show comparison table")
195+
print(" 0 - Exit")
196+
197+
choice = input("\nEnter your choice (0-5): ").strip()
198+
199+
if choice == "1":
200+
demo_layer1_direct_sdk()
201+
elif choice == "2":
202+
if not os.getenv("OPENAI_API_KEY"):
203+
print("\n❌ Error: OPENAI_API_KEY not set")
204+
return
205+
demo_layer2_sentience_agent()
206+
elif choice == "3":
207+
if not os.getenv("OPENAI_API_KEY"):
208+
print("\n❌ Error: OPENAI_API_KEY not set")
209+
return
210+
demo_layer3_conversational_agent()
211+
elif choice == "4":
212+
demo_layer3_with_local_llm()
213+
elif choice == "5":
214+
demo_comparison()
215+
elif choice == "0":
216+
print("Goodbye!")
217+
else:
218+
print("Invalid choice")
219+
220+
221+
if __name__ == "__main__":
222+
main()

sentience/__init__.py

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
# Agent Layer (Phase 1 & 2)
1919
from .llm_provider import LLMProvider, LLMResponse, OpenAIProvider, AnthropicProvider, LocalLLMProvider
2020
from .agent import SentienceAgent
21+
from .conversational_agent import ConversationalAgent
2122

2223
__version__ = "0.10.7"
2324

@@ -49,12 +50,13 @@
4950
"generate",
5051
"read",
5152
"screenshot",
52-
# Agent Layer
53+
# Agent Layer (Phase 1 & 2)
5354
"LLMProvider",
5455
"LLMResponse",
5556
"OpenAIProvider",
5657
"AnthropicProvider",
5758
"LocalLLMProvider",
5859
"SentienceAgent",
60+
"ConversationalAgent",
5961
]
6062

0 commit comments

Comments
 (0)