-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathtest-local-chat.py
More file actions
executable file
Β·195 lines (163 loc) Β· 6.33 KB
/
test-local-chat.py
File metadata and controls
executable file
Β·195 lines (163 loc) Β· 6.33 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
#!/usr/bin/env python3
"""
Test script for local llx chat API
Tests chat functionality with local Ollama models through llx proxy
"""
import requests
import time
import sys
from typing import Any
HTTP_OK = 200
def test_llx_health() -> bool:
"""Test if llx API is running"""
try:
response = requests.get("http://localhost:4000/health", timeout=5)
return response.status_code == HTTP_OK
except:
return False
def test_ollama_health() -> bool:
"""Test if Ollama is running"""
try:
response = requests.get("http://localhost:11434/api/tags", timeout=5)
return response.status_code == HTTP_OK
except:
return False
def get_available_models() -> list[str]:
"""Get available Ollama models"""
try:
response = requests.get("http://localhost:11434/api/tags", timeout=5)
if response.status_code == 200:
data = response.json()
return [model["name"] for model in data.get("models", [])]
except:
pass
return []
def test_llx_models() -> list[str]:
"""Test models available through llx API"""
try:
response = requests.get("http://localhost:4000/v1/models", timeout=5)
if response.status_code == 200:
data = response.json()
return [model["id"] for model in data.get("data", [])]
except:
pass
return []
def test_chat_completion(model="qwen2.5-coder:7b", message="Hello! Can you write a simple Python function?"):
"""Test chat completion through llx API"""
url = "http://localhost:4000/v1/chat/completions"
payload = {
"model": model,
"messages": [
{"role": "user", "content": message}
],
"temperature": 0.2,
"max_tokens": 500
}
headers = {
"Content-Type": "application/json",
"Authorization": "Bearer sk-proxy-local-dev"
}
try:
print(f"π€ Testing chat with model: {model}")
print(f"π Message: {message}")
print(f"π API: {url}")
print("β³ Sending request...")
start_time = time.time()
response = requests.post(url, json=payload, headers=headers, timeout=30)
end_time = time.time()
print(f"β‘ Response time: {end_time - start_time:.2f}s")
print(f"π Status: {response.status_code}")
if response.status_code == 200:
data = response.json()
content = data["choices"][0]["message"]["content"]
tokens = data.get("usage", {})
print(f"β
Success!")
print(f"π¬ Response: {content[:200]}...")
if tokens:
print(f"π’ Tokens used: {tokens}")
return True
else:
print(f"β Error: {response.text}")
return False
except Exception as e:
print(f"β Exception: {e}")
return False
def _check_services() -> tuple[bool, bool]:
"""Check llx and Ollama health and print results."""
print("\nπ Checking services...")
llx_healthy = test_llx_health()
ollama_healthy = test_ollama_health()
print(f"π€ llx API: {'β
Running' if llx_healthy else 'β Not running'}")
print(f"π¦ Ollama: {'β
Running' if ollama_healthy else 'β Not running'}")
return llx_healthy, ollama_healthy
def _print_models() -> tuple[list[str], list[str]]:
"""Fetch and print available models."""
print("\nπ Available models:")
ollama_models = get_available_models()
llx_models = test_llx_models()
print(f"π¦ Ollama models ({len(ollama_models)}):")
for model in ollama_models[:5]:
print(f" β’ {model}")
if len(ollama_models) > 5:
print(f" ... and {len(ollama_models) - 5} more")
print(f"\nπ€ llx models ({len(llx_models)}):")
for model in llx_models:
print(f" β’ {model}")
return ollama_models, llx_models
def _select_test_models(llx_models: list[str]) -> list[str]:
"""Pick up to 2 models to test."""
test_models: list[str] = []
local_models = [m for m in llx_models if "ollama" in m.lower() or "local" in m.lower()]
if local_models:
test_models.extend(local_models[:2])
if not test_models and llx_models:
test_models.extend(llx_models[:2])
return test_models
def _run_chat_tests(test_models: list[str]) -> int:
"""Run chat-completion tests and return success count."""
print(f"\nπ§ͺ Testing chat with {len(test_models)} models...")
success_count = 0
for i, model in enumerate(test_models, 1):
print(f"\n--- Test {i}/{len(test_models)} ---")
if test_chat_completion(model, "Hello! Write a simple 'hello world' function in Python."):
success_count += 1
if i < len(test_models):
print("β³ Waiting 2 seconds before next test...")
time.sleep(2)
return success_count
def _print_summary(success_count: int, total: int, test_models: list[str]) -> bool:
"""Print test summary and return overall success flag."""
print(f"\nπ Test Summary:")
print(f"β
Successful: {success_count}/{total}")
print(f"β Failed: {total - success_count}/{total}")
if success_count > 0:
print(f"\nπ Local chat is working! You can:")
print(f" β’ Use VS Code at http://localhost:8080")
print(f" β’ Configure chat to use http://localhost:4000")
print(f" β’ Use model: {test_models[0]}")
return True
print(f"\nβ Chat tests failed. Check llx API logs:")
print(f" ./docker-manage.sh logs dev llx-api")
return False
def main() -> bool:
print("π llx Local Chat API Test")
print("=" * 50)
llx_healthy, ollama_healthy = _check_services()
if not llx_healthy:
print("\nβ llx API is not running. Start it with:")
print(" ./docker-manage.sh dev")
return False
if not ollama_healthy:
print("\nβ Ollama is not running. Start it with:")
print(" ollama serve")
return False
_, llx_models = _print_models()
test_models = _select_test_models(llx_models)
if not test_models:
print("\nβ No models available for testing")
return False
success_count = _run_chat_tests(test_models)
return _print_summary(success_count, len(test_models), test_models)
if __name__ == "__main__":
success = main()
sys.exit(0 if success else 1)