Skip to content

Commit 17747e8

Browse files
committed
refactor: deduplicate OpenAI request building, improve fallback and cost estimation
Extract buildRequestBase to eliminate code duplication between OpenAI client and the shared builder. Improve provider health decay, cost estimator accuracy, and fallback chain logic. Add comprehensive tests for types and cost estimation.
1 parent 7c3eafb commit 17747e8

17 files changed

Lines changed: 1130 additions & 154 deletions

client/bedrock.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ func (c *BedrockClient) StreamChat(ctx context.Context, messages []EyrieMessage,
168168
var args map[string]interface{}
169169
_ = json.Unmarshal(chunk.ContentBlock.Input, &args)
170170
tc := ToolCall{ID: chunk.ContentBlock.ID, Name: chunk.ContentBlock.Name, Arguments: args}
171-
toolCalls = append(toolCalls, tc)
171+
_ = append(toolCalls, tc) // individual calls sent via channel; accumulation unused
172172
select {
173173
case ch <- EyrieStreamEvent{Type: "tool_call", ToolCall: &tc}:
174174
case <-streamCtx.Done():

client/benchmarks_test.go

Lines changed: 293 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,293 @@
1+
package client
2+
3+
import (
4+
"context"
5+
"testing"
6+
)
7+
8+
// ---------- buildRequestBase ----------
9+
10+
func BenchmarkBuildRequestBase_SimpleMessages(b *testing.B) {
11+
messages := []EyrieMessage{
12+
{Role: "system", Content: "You are helpful."},
13+
{Role: "user", Content: "Hello"},
14+
{Role: "assistant", Content: "Hi there!"},
15+
{Role: "user", Content: "How are you?"},
16+
}
17+
opts := ChatOptions{
18+
Model: "gpt-4",
19+
Temperature: floatPtr(0.7),
20+
MaxTokens: 4096,
21+
}
22+
b.ReportAllocs()
23+
b.ResetTimer()
24+
for i := 0; i < b.N; i++ {
25+
_ = buildRequestBase(messages, opts, false, nil)
26+
}
27+
}
28+
29+
func BenchmarkBuildRequestBase_WithToolUse(b *testing.B) {
30+
messages := []EyrieMessage{
31+
{Role: "system", Content: "You are helpful."},
32+
{Role: "user", Content: "Search for files"},
33+
{Role: "assistant", ToolUse: []ToolCall{
34+
{ID: "tc-1", Name: "search", Arguments: map[string]interface{}{"query": "main.go"}},
35+
{ID: "tc-2", Name: "read", Arguments: map[string]interface{}{"path": "main.go"}},
36+
}},
37+
{Role: "user", ToolResult: &ToolResult{ToolUseID: "tc-1", Content: "Found 1 file"}},
38+
{Role: "user", ToolResult: &ToolResult{ToolUseID: "tc-2", Content: "package main\nfunc main() {}"}},
39+
}
40+
opts := ChatOptions{
41+
Model: "gpt-4",
42+
Tools: []EyrieTool{
43+
{Name: "search", Description: "Search for files", Parameters: map[string]interface{}{"query": map[string]string{"type": "string"}}},
44+
{Name: "read", Description: "Read a file", Parameters: map[string]interface{}{"path": map[string]string{"type": "string"}}},
45+
},
46+
MaxTokens: 4096,
47+
}
48+
b.ReportAllocs()
49+
b.ResetTimer()
50+
for i := 0; i < b.N; i++ {
51+
_ = buildRequestBase(messages, opts, false, nil)
52+
}
53+
}
54+
55+
func BenchmarkBuildRequestBase_WithImages(b *testing.B) {
56+
messages := []EyrieMessage{
57+
{Role: "user", Content: "What's in this image?", Images: []string{"data:image/png;base64,iVBORw0KGgo="}},
58+
}
59+
opts := ChatOptions{Model: "gpt-4-vision", MaxTokens: 4096}
60+
b.ReportAllocs()
61+
b.ResetTimer()
62+
for i := 0; i < b.N; i++ {
63+
_ = buildRequestBase(messages, opts, false, nil)
64+
}
65+
}
66+
67+
func BenchmarkBuildRequestBase_Streaming(b *testing.B) {
68+
messages := []EyrieMessage{
69+
{Role: "user", Content: "Write a long essay"},
70+
}
71+
opts := ChatOptions{Model: "gpt-4", MaxTokens: 4096}
72+
b.ReportAllocs()
73+
b.ResetTimer()
74+
for i := 0; i < b.N; i++ {
75+
_ = buildRequestBase(messages, opts, true, nil)
76+
}
77+
}
78+
79+
// ---------- buildCacheKey ----------
80+
81+
func BenchmarkBuildCacheKey_Short(b *testing.B) {
82+
messages := []EyrieMessage{
83+
{Role: "user", Content: "Hello"},
84+
}
85+
opts := ChatOptions{Model: "gpt-4"}
86+
b.ReportAllocs()
87+
b.ResetTimer()
88+
for i := 0; i < b.N; i++ {
89+
_ = buildCacheKey(messages, opts)
90+
}
91+
}
92+
93+
func BenchmarkBuildCacheKey_Long(b *testing.B) {
94+
longContent := make([]byte, 4000)
95+
for i := range longContent {
96+
longContent[i] = 'a'
97+
}
98+
messages := []EyrieMessage{
99+
{Role: "system", Content: string(longContent)},
100+
{Role: "user", Content: "Hello"},
101+
{Role: "assistant", Content: string(longContent)},
102+
{Role: "user", Content: "Continue"},
103+
}
104+
opts := ChatOptions{Model: "gpt-4", System: "You are helpful"}
105+
b.ReportAllocs()
106+
b.ResetTimer()
107+
for i := 0; i < b.N; i++ {
108+
_ = buildCacheKey(messages, opts)
109+
}
110+
}
111+
112+
func BenchmarkBuildCacheKey_WithToolCalls(b *testing.B) {
113+
messages := []EyrieMessage{
114+
{Role: "assistant", ToolUse: []ToolCall{
115+
{ID: "tc-1", Name: "search", Arguments: map[string]interface{}{"query": "test"}},
116+
}},
117+
{Role: "user", ToolResult: &ToolResult{ToolUseID: "tc-1", Content: "result"}},
118+
}
119+
opts := ChatOptions{Model: "gpt-4"}
120+
b.ReportAllocs()
121+
b.ResetTimer()
122+
for i := 0; i < b.N; i++ {
123+
_ = buildCacheKey(messages, opts)
124+
}
125+
}
126+
127+
// ---------- CachedProvider ----------
128+
129+
func BenchmarkCachedProvider_CacheHit(b *testing.B) {
130+
mock := NewMockProvider(MockModeFixed)
131+
mock.Response = "cached response"
132+
cp := NewCachedProvider(mock, DefaultCacheConfig())
133+
messages := []EyrieMessage{{Role: "user", Content: "Hello"}}
134+
opts := ChatOptions{Model: "gpt-4"}
135+
136+
// Prime the cache
137+
_, _ = cp.Chat(context.TODO(), messages, opts)
138+
139+
b.ReportAllocs()
140+
b.ResetTimer()
141+
for i := 0; i < b.N; i++ {
142+
_, _ = cp.Chat(context.TODO(), messages, opts)
143+
}
144+
}
145+
146+
func BenchmarkCachedProvider_CacheMiss(b *testing.B) {
147+
mock := NewMockProvider(MockModeFixed)
148+
mock.Response = "response"
149+
cp := NewCachedProvider(mock, DefaultCacheConfig())
150+
opts := ChatOptions{Model: "gpt-4"}
151+
152+
b.ReportAllocs()
153+
b.ResetTimer()
154+
for i := 0; i < b.N; i++ {
155+
messages := []EyrieMessage{{Role: "user", Content: "unique query"}}
156+
_, _ = cp.Chat(context.TODO(), messages, opts)
157+
}
158+
}
159+
160+
// ---------- SanitizeMessages ----------
161+
162+
func BenchmarkSanitizeMessages_Clean(b *testing.B) {
163+
messages := []EyrieMessage{
164+
{Role: "user", Content: "Hello"},
165+
{Role: "assistant", Content: "Hi there!"},
166+
{Role: "user", Content: "How are you?"},
167+
{Role: "assistant", Content: "I'm good."},
168+
}
169+
b.ReportAllocs()
170+
b.ResetTimer()
171+
for i := 0; i < b.N; i++ {
172+
_ = SanitizeMessages(messages)
173+
}
174+
}
175+
176+
func BenchmarkSanitizeMessages_WithOrphans(b *testing.B) {
177+
messages := []EyrieMessage{
178+
{Role: "user", Content: "Search for files"},
179+
{Role: "assistant", ToolUse: []ToolCall{
180+
{ID: "tc-1", Name: "search", Arguments: map[string]interface{}{"query": "test"}},
181+
{ID: "tc-2", Name: "read", Arguments: map[string]interface{}{"path": "main.go"}},
182+
}},
183+
// tc-1 has result, tc-2 is orphaned
184+
{Role: "user", ToolResult: &ToolResult{ToolUseID: "tc-1", Content: "Found 1 file"}},
185+
}
186+
b.ReportAllocs()
187+
b.ResetTimer()
188+
for i := 0; i < b.N; i++ {
189+
_ = SanitizeMessages(messages)
190+
}
191+
}
192+
193+
func BenchmarkSanitizeMessages_Large(b *testing.B) {
194+
messages := make([]EyrieMessage, 50)
195+
for i := range messages {
196+
if i%3 == 0 {
197+
messages[i] = EyrieMessage{Role: "user", Content: "message"}
198+
} else {
199+
messages[i] = EyrieMessage{Role: "assistant", Content: "response"}
200+
}
201+
}
202+
b.ReportAllocs()
203+
b.ResetTimer()
204+
for i := 0; i < b.N; i++ {
205+
_ = SanitizeMessages(messages)
206+
}
207+
}
208+
209+
// ---------- MergeConsecutiveRoles ----------
210+
211+
func BenchmarkMergeConsecutiveRoles_NoMerge(b *testing.B) {
212+
messages := []EyrieMessage{
213+
{Role: "user", Content: "Hello"},
214+
{Role: "assistant", Content: "Hi"},
215+
{Role: "user", Content: "How are you?"},
216+
{Role: "assistant", Content: "Good"},
217+
}
218+
b.ReportAllocs()
219+
b.ResetTimer()
220+
for i := 0; i < b.N; i++ {
221+
_ = MergeConsecutiveRoles(messages)
222+
}
223+
}
224+
225+
func BenchmarkMergeConsecutiveRoles_WithMerges(b *testing.B) {
226+
messages := []EyrieMessage{
227+
{Role: "user", Content: "Hello"},
228+
{Role: "user", Content: "World"},
229+
{Role: "assistant", Content: "Hi"},
230+
{Role: "assistant", Content: "There"},
231+
{Role: "user", Content: "How are you?"},
232+
}
233+
b.ReportAllocs()
234+
b.ResetTimer()
235+
for i := 0; i < b.N; i++ {
236+
_ = MergeConsecutiveRoles(messages)
237+
}
238+
}
239+
240+
func BenchmarkMergeConsecutiveRoles_WithToolUse(b *testing.B) {
241+
messages := []EyrieMessage{
242+
{Role: "assistant", ToolUse: []ToolCall{{ID: "tc-1", Name: "search"}}},
243+
{Role: "assistant", Content: "Let me search"},
244+
{Role: "user", ToolResult: &ToolResult{ToolUseID: "tc-1", Content: "result"}},
245+
}
246+
b.ReportAllocs()
247+
b.ResetTimer()
248+
for i := 0; i < b.N; i++ {
249+
_ = MergeConsecutiveRoles(messages)
250+
}
251+
}
252+
253+
// ---------- MetricsCollector ----------
254+
255+
func BenchmarkMetricsCollector_Record(b *testing.B) {
256+
mc := NewMetricsCollector()
257+
m := CallMetrics{Model: "gpt-4", Provider: "openai", InputTokens: 100, OutputTokens: 50, LatencyMs: 100}
258+
b.ReportAllocs()
259+
b.ResetTimer()
260+
for i := 0; i < b.N; i++ {
261+
mc.Record(m)
262+
}
263+
}
264+
265+
func BenchmarkMetricsCollector_Recent(b *testing.B) {
266+
mc := NewMetricsCollector()
267+
m := CallMetrics{Model: "gpt-4", Provider: "openai", InputTokens: 100, OutputTokens: 50, LatencyMs: 100}
268+
for i := 0; i < 100; i++ {
269+
mc.Record(m)
270+
}
271+
b.ReportAllocs()
272+
b.ResetTimer()
273+
for i := 0; i < b.N; i++ {
274+
_ = mc.Recent(10)
275+
}
276+
}
277+
278+
func BenchmarkMetricsCollector_TotalCost(b *testing.B) {
279+
mc := NewMetricsCollector()
280+
m := CallMetrics{Model: "gpt-4", Provider: "openai", InputTokens: 1000, OutputTokens: 500, CacheReadTokens: 200, CacheCreationTokens: 100}
281+
for i := 0; i < 100; i++ {
282+
mc.Record(m)
283+
}
284+
b.ReportAllocs()
285+
b.ResetTimer()
286+
for i := 0; i < b.N; i++ {
287+
_ = mc.TotalCost()
288+
}
289+
}
290+
291+
// ---------- helpers ----------
292+
293+
func floatPtr(f float64) *float64 { return &f }

client/cost_estimator.go

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,8 @@ import (
44
"fmt"
55
"strings"
66
"sync"
7+
8+
"github.com/GrayCodeAI/tok"
79
)
810

911
// CostEstimator estimates the cost of an API call BEFORE sending it.
@@ -63,15 +65,14 @@ func (ce *CostEstimator) IsExpensive(est CostEstimate, threshold float64) bool {
6365
func (ce *CostEstimator) countInputTokens(messages []EyrieMessage) int {
6466
total := 0
6567
for _, m := range messages {
66-
// ~4 chars per token (rough estimate for fast pre-call estimation)
67-
total += len(m.Content) / 4
68+
total += tok.EstimateTokens(m.Content)
6869
if m.ToolResult != nil {
69-
total += len(m.ToolResult.Content) / 4
70+
total += tok.EstimateTokens(m.ToolResult.Content)
7071
}
7172
for _, tc := range m.ToolUse {
7273
total += 50 // tool call overhead
7374
for _, v := range tc.Arguments {
74-
total += len(fmt.Sprintf("%v", v)) / 4
75+
total += tok.EstimateTokens(fmt.Sprintf("%v", v))
7576
}
7677
}
7778
}
@@ -99,7 +100,7 @@ func NewStreamingTokenCounter(model string, inputTokens int) *StreamingTokenCoun
99100
// AddOutput records streamed output tokens.
100101
func (stc *StreamingTokenCounter) AddOutput(text string) {
101102
stc.mu.Lock()
102-
stc.outputTokens += len(text) / 4
103+
stc.outputTokens += tok.EstimateTokens(text)
103104
stc.mu.Unlock()
104105
}
105106

@@ -156,7 +157,7 @@ func NewPromptOptimizer(maxInputTokens int) *PromptOptimizer {
156157
func (po *PromptOptimizer) Optimize(messages []EyrieMessage) []EyrieMessage {
157158
totalTokens := 0
158159
for _, m := range messages {
159-
totalTokens += len(m.Content)/4 + 10 // +10 for overhead
160+
totalTokens += tok.EstimateTokens(m.Content) + 10 // +10 for overhead
160161
}
161162

162163
if totalTokens <= po.maxInputTokens {
@@ -190,17 +191,21 @@ func (po *PromptOptimizer) Optimize(messages []EyrieMessage) []EyrieMessage {
190191
func compressMessages(messages []EyrieMessage) string {
191192
var parts []string
192193
for _, m := range messages {
193-
content := m.Content
194-
if len(content) > 100 {
195-
content = content[:100] + "..."
196-
}
197-
if content != "" {
198-
parts = append(parts, m.Role+": "+content)
194+
if m.Content != "" {
195+
parts = append(parts, m.Role+": "+m.Content)
199196
}
200197
}
201-
summary := strings.Join(parts, " | ")
202-
if len(summary) > 500 {
203-
summary = summary[:500]
198+
raw := strings.Join(parts, "\n")
199+
200+
// Use tok compression pipeline for intelligent summarization
201+
compressed, _ := tok.Compress(raw, tok.Minimal)
202+
if len(compressed) > 0 && len(compressed) < len(raw) {
203+
return compressed
204+
}
205+
206+
// Fallback: naive truncation
207+
if len(raw) > 500 {
208+
raw = raw[:500]
204209
}
205-
return summary
210+
return raw
206211
}

client/cost_estimator_test.go

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,8 @@ package client
33
import (
44
"math"
55
"testing"
6+
7+
"github.com/GrayCodeAI/tok"
68
)
79

810
func TestCostEstimateForKnownModels(t *testing.T) {
@@ -71,7 +73,7 @@ func TestCostEstimateUnknownModelReturnsNonZero(t *testing.T) {
7173
expectedInPrice := 1.0 / 1_000_000
7274
expectedOutPrice := 3.0 / 1_000_000
7375

74-
inputTokens := len("test message here") / 4
76+
inputTokens := tok.EstimateTokens("test message here")
7577
expectedInput := float64(inputTokens) * expectedInPrice
7678
expectedOutput := float64(1000) * expectedOutPrice
7779

0 commit comments

Comments
 (0)