Skip to content

Commit 338ee4f

Browse files
committed
Add x-session-affinity to fireworks test script
1 parent bb39143 commit 338ee4f

File tree

2 files changed

+12
-1
lines changed

2 files changed

+12
-1
lines changed

scripts/query-usage-stats.ts

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -42,7 +42,9 @@ async function queryUsageStats() {
4242
4343
client_stats AS (
4444
SELECT
45-
ROUND(AVG(cnt)) AS avg_requests_per_client
45+
ROUND(AVG(cnt)) AS avg_requests_per_client,
46+
PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cnt) AS median_requests_per_client,
47+
MAX(cnt) AS max_requests_per_client
4648
FROM (
4749
SELECT client_id, COUNT(*) AS cnt
4850
FROM recent
@@ -70,6 +72,8 @@ async function queryUsageStats() {
7072
t.avg_cache_rate_pct,
7173
t.avg_output_tokens,
7274
c.avg_requests_per_client,
75+
c.median_requests_per_client,
76+
c.max_requests_per_client,
7377
r.median_rps,
7478
r.peak_rps,
7579
t.total_requests
@@ -90,6 +94,8 @@ async function queryUsageStats() {
9094
console.log(`Median RPS: ${row.median_rps}`)
9195
console.log(`Peak RPS: ${row.peak_rps}`)
9296
console.log(`Avg requests/client: ${row.avg_requests_per_client}`)
97+
console.log(`Median requests/client: ${row.median_requests_per_client}`)
98+
console.log(`Max requests/client: ${row.max_requests_per_client}`)
9399
console.log(`Total requests (7d): ${row.total_requests}`)
94100
}
95101

scripts/test-fireworks-long.ts

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,9 @@ const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
2323

2424
const MAX_TOKENS = 100
2525

26+
// Stable session ID so all turns route to the same machine for prompt caching
27+
const SESSION_ID = `bench-${Math.random().toString(36).slice(2, 10)}`
28+
2629
function computeCost(usage: Record<string, unknown>): { cost: number; breakdown: string } {
2730
const inputTokens = typeof usage.prompt_tokens === 'number' ? usage.prompt_tokens : 0
2831
const outputTokens = typeof usage.completion_tokens === 'number' ? usage.completion_tokens : 0
@@ -175,6 +178,7 @@ async function makeConversationStreamRequest(
175178
headers: {
176179
Authorization: `Bearer ${apiKey}`,
177180
'Content-Type': 'application/json',
181+
'x-session-affinity': SESSION_ID,
178182
},
179183
body: JSON.stringify({
180184
model: FIREWORKS_MODEL,
@@ -277,6 +281,7 @@ async function main() {
277281
console.log(`Max tokens: ${MAX_TOKENS} (low output per turn)`)
278282
console.log(`Turns: ${TURN_PROMPTS.length}`)
279283
console.log(`Pricing: $0.30/M input, $0.03/M cached, $1.20/M output`)
284+
console.log(`Session ID: ${SESSION_ID} (x-session-affinity header)`)
280285
console.log('='.repeat(60))
281286
console.log()
282287

0 commit comments

Comments
 (0)