Skip to content

Commit 96e88f2

Browse files
committed
Agent abstraction Phase 1:
1 parent 8ceb466 commit 96e88f2

File tree

5 files changed

+1320
-1
lines changed

5 files changed

+1320
-1
lines changed

examples/test_local_llm_agent.py

Lines changed: 86 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,86 @@
1+
"""
2+
Test script for LocalLLMProvider with Qwen2.5-3B-Instruct
3+
Demonstrates using a local LLM with SentienceAgent
4+
"""
5+
6+
from sentience.llm_provider import LocalLLMProvider
7+
8+
def test_local_llm_basic():
9+
"""Test basic LLM response generation"""
10+
print("="*70)
11+
print("Testing LocalLLMProvider with Qwen2.5-3B-Instruct")
12+
print("="*70)
13+
14+
# Initialize local LLM
15+
# Using the model from your local cache
16+
llm = LocalLLMProvider(
17+
model_name="Qwen/Qwen2.5-3B-Instruct",
18+
device="auto", # Will use CUDA if available, else CPU
19+
load_in_4bit=False, # Set to True to save memory
20+
torch_dtype="auto"
21+
)
22+
23+
print("\n" + "="*70)
24+
print("Test 1: Simple question")
25+
print("="*70)
26+
27+
response = llm.generate(
28+
system_prompt="You are a helpful web automation assistant.",
29+
user_prompt="What is 2+2?",
30+
max_new_tokens=50,
31+
temperature=0.1
32+
)
33+
34+
print(f"Response: {response.content}")
35+
print(f"Tokens: {response.total_tokens} (prompt: {response.prompt_tokens}, completion: {response.completion_tokens})")
36+
37+
print("\n" + "="*70)
38+
print("Test 2: Action parsing (for agent)")
39+
print("="*70)
40+
41+
system_prompt = """You are an AI web automation agent.
42+
43+
GOAL: Click the search box
44+
45+
VISIBLE ELEMENTS (sorted by importance, max 50):
46+
[1] <button> "Sign In" {PRIMARY,CLICKABLE,color:blue} @ (100,50) (Imp:900)
47+
[2] <textbox> "" {CLICKABLE} @ (200,100) (Imp:850)
48+
[3] <link> "Help" {} @ (50,150) (Imp:700)
49+
50+
VISUAL CUES:
51+
- {PRIMARY}: Main call-to-action element
52+
- {CLICKABLE}: Element is clickable
53+
- {color:X}: Background color name
54+
55+
RESPONSE FORMAT (return ONLY the function call):
56+
- CLICK(id) - Click element by ID
57+
- TYPE(id, "text") - Type text into element
58+
- PRESS("key") - Press keyboard key
59+
- FINISH() - Task complete
60+
"""
61+
62+
user_prompt = "What is the next step to achieve the goal?"
63+
64+
response = llm.generate(
65+
system_prompt=system_prompt,
66+
user_prompt=user_prompt,
67+
max_new_tokens=20,
68+
temperature=0.0
69+
)
70+
71+
print(f"Agent Response: {response.content}")
72+
print(f"Tokens: {response.total_tokens}")
73+
74+
# Check if response is parseable
75+
if "CLICK(2)" in response.content or "click(2)" in response.content.lower():
76+
print("\n✅ SUCCESS: LLM correctly identified textbox (element 2) as search box!")
77+
else:
78+
print(f"\n⚠️ Response may need adjustment: {response.content}")
79+
80+
print("\n" + "="*70)
81+
print("LocalLLMProvider Test Complete!")
82+
print("="*70)
83+
84+
85+
if __name__ == "__main__":
86+
test_local_llm_basic()

sentience/__init__.py

Lines changed: 13 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,14 @@
1515
from .read import read
1616
from .screenshot import screenshot
1717

18-
__version__ = "0.10.6"
18+
# Agent Layer (Phase 1 & 2)
19+
from .llm_provider import LLMProvider, LLMResponse, OpenAIProvider, AnthropicProvider, LocalLLMProvider
20+
from .agent import SentienceAgent
21+
22+
__version__ = "0.10.7"
1923

2024
__all__ = [
25+
# Core SDK
2126
"SentienceBrowser",
2227
"Snapshot",
2328
"Element",
@@ -44,5 +49,12 @@
4449
"generate",
4550
"read",
4651
"screenshot",
52+
# Agent Layer
53+
"LLMProvider",
54+
"LLMResponse",
55+
"OpenAIProvider",
56+
"AnthropicProvider",
57+
"LocalLLMProvider",
58+
"SentienceAgent",
4759
]
4860

0 commit comments

Comments
 (0)