Skip to content

Commit ed9da18

Browse files
committed
feat: update compression response to new API format, add SSL cert fix
- Replace input_tokens, rate with saved_tokens, cost_savings, reduction, time_ms - Use certifi for SSL context when available (fixes cert verification on macOS) - Add optional edgee[ssl] dependency for certifi - Update README with new compression fields and troubleshooting section Made-with: Cursor
1 parent 2fc1900 commit ed9da18

7 files changed

Lines changed: 52 additions & 36 deletions

File tree

README.md

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ edgee = Edgee("your-api-key")
2020

2121
# Send a simple request
2222
response = edgee.send(
23-
model="gpt-5.2",
23+
model="anthropic/claude-haiku-4-5",
2424
input="What is the capital of France?"
2525
)
2626

@@ -34,7 +34,7 @@ The `send()` method makes non-streaming chat completion requests:
3434

3535
```python
3636
response = edgee.send(
37-
model="gpt-5.2",
37+
model="anthropic/claude-haiku-4-5",
3838
input="Hello, world!"
3939
)
4040

@@ -48,17 +48,18 @@ if response.usage:
4848
print(f"Tokens used: {response.usage.total_tokens}")
4949

5050
if response.compression:
51-
print(f"Input tokens: {response.compression.input_tokens}")
5251
print(f"Saved tokens: {response.compression.saved_tokens}")
53-
print(f"Compression rate: {response.compression.rate}")
52+
print(f"Reduction: {response.compression.reduction}%")
53+
print(f"Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}")
54+
print(f"Time: {response.compression.time_ms} ms")
5455
```
5556

5657
## Stream Method
5758

5859
The `stream()` method enables real-time streaming responses:
5960

6061
```python
61-
for chunk in edgee.stream("gpt-5.2", "Tell me a story"):
62+
for chunk in edgee.stream("anthropic/claude-haiku-4-5", "Tell me a story"):
6263
if chunk.text:
6364
print(chunk.text, end="", flush=True)
6465

edgee/__init__.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@
22

33
import json
44
import os
5+
import ssl
56
from dataclasses import dataclass
67
from urllib.error import HTTPError
78
from urllib.request import Request, urlopen
@@ -11,6 +12,18 @@
1112
API_ENDPOINT = "/v1/chat/completions"
1213

1314

15+
def _ssl_context() -> ssl.SSLContext:
16+
"""Create SSL context. Uses certifi's CA bundle when available (fixes cert issues on macOS)."""
17+
ctx = ssl.create_default_context()
18+
try:
19+
import certifi
20+
21+
ctx.load_verify_locations(certifi.where())
22+
except ImportError:
23+
pass # Use default system/store certs
24+
return ctx
25+
26+
1427
@dataclass
1528
class FunctionDefinition:
1629
name: str
@@ -70,9 +83,10 @@ class Usage:
7083

7184
@dataclass
7285
class Compression:
73-
input_tokens: int
7486
saved_tokens: int
75-
rate: float
87+
cost_savings: int # micro-units (e.g. 27000 = $0.027)
88+
reduction: int # percentage (e.g. 48 = 48%)
89+
time_ms: int # milliseconds
7690

7791

7892
@dataclass
@@ -253,7 +267,7 @@ def send(
253267
def _handle_non_streaming_response(self, request: Request) -> SendResponse:
254268
"""Handle non-streaming response."""
255269
try:
256-
with urlopen(request) as response:
270+
with urlopen(request, context=_ssl_context()) as response:
257271
data = json.loads(response.read().decode("utf-8"))
258272
except HTTPError as e:
259273
error_body = e.read().decode("utf-8")
@@ -279,17 +293,18 @@ def _handle_non_streaming_response(self, request: Request) -> SendResponse:
279293
compression = None
280294
if "compression" in data:
281295
compression = Compression(
282-
input_tokens=data["compression"]["input_tokens"],
283296
saved_tokens=data["compression"]["saved_tokens"],
284-
rate=data["compression"]["rate"],
297+
cost_savings=data["compression"]["cost_savings"],
298+
reduction=data["compression"]["reduction"],
299+
time_ms=data["compression"]["time_ms"],
285300
)
286301

287302
return SendResponse(choices=choices, usage=usage, compression=compression)
288303

289304
def _handle_streaming_response(self, request: Request):
290305
"""Handle streaming response, yielding StreamChunk objects."""
291306
try:
292-
with urlopen(request) as response:
307+
with urlopen(request, context=_ssl_context()) as response:
293308
# Read and parse SSE stream
294309
for line in response:
295310
decoded_line = line.decode("utf-8")

example/compression.py

Lines changed: 11 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -87,7 +87,7 @@
8787
Based on this context, summarize the key milestones in AI development in 3 bullet points."""
8888

8989
response = edgee.send(
90-
model="gpt-5.2",
90+
model="anthropic/claude-haiku-4-5",
9191
input={
9292
"messages": [
9393
{"role": "user", "content": user_message},
@@ -111,21 +111,17 @@
111111
# Display compression information
112112
if response.compression:
113113
print("Compression Metrics:")
114-
print(f" Input tokens: {response.compression.input_tokens}")
115114
print(f" Saved tokens: {response.compression.saved_tokens}")
116-
print(f" Compression rate: {response.compression.rate:.2%}")
117-
savings_pct = (
118-
(response.compression.saved_tokens / response.compression.input_tokens * 100)
119-
if response.compression.input_tokens > 0
120-
else 0
121-
)
122-
print(f" Savings: {savings_pct:.1f}% of input tokens saved!")
123-
print()
124-
print(" 💡 Without compression, this request would have used")
125-
print(f" {response.compression.input_tokens} input tokens.")
126-
print(
127-
f" With compression, only {response.compression.input_tokens - response.compression.saved_tokens} tokens were processed!"
128-
)
115+
print(f" Reduction: {response.compression.reduction}%")
116+
print(f" Cost savings: ${response.compression.cost_savings / 1_000_000:.3f}")
117+
print(f" Time: {response.compression.time_ms} ms")
118+
if response.compression.reduction > 0:
119+
original_tokens = response.compression.saved_tokens * 100 // response.compression.reduction
120+
tokens_after = original_tokens - response.compression.saved_tokens
121+
print()
122+
print(" 💡 Without compression, this request would have used")
123+
print(f" {original_tokens} input tokens.")
124+
print(f" With compression, only {tokens_after} tokens were processed!")
129125
else:
130126
print("No compression data available in response.")
131127
print("Note: Compression data is only returned when compression is enabled")

example/test.py

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,7 +13,7 @@
1313
# Test 1: Simple string input
1414
print("Test 1: Simple string input")
1515
response1 = edgee.send(
16-
model="mistral/mistral-small-latest",
16+
model="anthropic/claude-haiku-4-5",
1717
input="What is the capital of France?",
1818
)
1919
print(f"Content: {response1.text}")
@@ -23,7 +23,7 @@
2323
# Test 2: Full input object with messages
2424
print("Test 2: Full input object with messages")
2525
response2 = edgee.send(
26-
model="mistral/mistral-small-latest",
26+
model="anthropic/claude-haiku-4-5",
2727
input={
2828
"messages": [
2929
{"role": "system", "content": "You are a helpful assistant."},
@@ -37,7 +37,7 @@
3737
# Test 3: With tools
3838
print("Test 3: With tools")
3939
response3 = edgee.send(
40-
model="gpt-5.2",
40+
model="anthropic/claude-haiku-4-5",
4141
input={
4242
"messages": [{"role": "user", "content": "What is the weather in Paris?"}],
4343
"tools": [
@@ -65,7 +65,7 @@
6565

6666
# Test 4: Streaming
6767
print("Test 4: Streaming")
68-
for chunk in edgee.stream(model="mistral/mistral-small-latest", input="What is Python?"):
68+
for chunk in edgee.stream(model="anthropic/claude-haiku-4-5", input="What is Python?"):
6969
if chunk.text:
7070
print(chunk.text, end="", flush=True)
7171
print("\n")

pyproject.toml

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
[project]
22
name = "edgee"
3-
version = "1.0.1"
3+
version = "1.0.2"
44
description = "Lightweight Python SDK for Edgee AI Gateway"
55
readme = "README.md"
66
license = "Apache-2.0"
@@ -25,6 +25,8 @@ Repository = "https://github.com/edgee-ai/python-sdk"
2525

2626
[project.optional-dependencies]
2727
dev = ["pytest>=8.0.0", "ruff>=0.8.0"]
28+
# Install certifi to fix SSL certificate verification on macOS (Python.org installs)
29+
ssl = ["certifi>=2024.0.0"]
2830

2931
[build-system]
3032
requires = ["hatchling"]

tests/test_edgee.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -320,9 +320,10 @@ def test_send_with_compression_response(self, mock_urlopen):
320320
],
321321
"usage": {"prompt_tokens": 100, "completion_tokens": 50, "total_tokens": 150},
322322
"compression": {
323-
"input_tokens": 100,
324323
"saved_tokens": 42,
325-
"rate": 0.6102003642987249,
324+
"cost_savings": 27000,
325+
"reduction": 48,
326+
"time_ms": 150,
326327
},
327328
}
328329
mock_urlopen.return_value = self._mock_response(mock_response_data)
@@ -331,9 +332,10 @@ def test_send_with_compression_response(self, mock_urlopen):
331332
result = client.send(model="gpt-4", input="Test")
332333

333334
assert result.compression is not None
334-
assert result.compression.input_tokens == 100
335335
assert result.compression.saved_tokens == 42
336-
assert result.compression.rate == 0.6102003642987249
336+
assert result.compression.cost_savings == 27000
337+
assert result.compression.reduction == 48
338+
assert result.compression.time_ms == 150
337339

338340
@patch("edgee.urlopen")
339341
def test_send_without_compression_response(self, mock_urlopen):

uv.lock

Lines changed: 1 addition & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)