File tree Expand file tree Collapse file tree 2 files changed +12
-1
lines changed
Expand file tree Collapse file tree 2 files changed +12
-1
lines changed Original file line number Diff line number Diff line change @@ -42,7 +42,9 @@ async function queryUsageStats() {
4242
4343 client_stats AS (
4444 SELECT
45- ROUND(AVG(cnt)) AS avg_requests_per_client
45+ ROUND(AVG(cnt)) AS avg_requests_per_client,
46+ PERCENTILE_CONT(0.5) WITHIN GROUP (ORDER BY cnt) AS median_requests_per_client,
47+ MAX(cnt) AS max_requests_per_client
4648 FROM (
4749 SELECT client_id, COUNT(*) AS cnt
4850 FROM recent
@@ -70,6 +72,8 @@ async function queryUsageStats() {
7072 t.avg_cache_rate_pct,
7173 t.avg_output_tokens,
7274 c.avg_requests_per_client,
75+ c.median_requests_per_client,
76+ c.max_requests_per_client,
7377 r.median_rps,
7478 r.peak_rps,
7579 t.total_requests
@@ -90,6 +94,8 @@ async function queryUsageStats() {
9094 console . log ( `Median RPS: ${ row . median_rps } ` )
9195 console . log ( `Peak RPS: ${ row . peak_rps } ` )
9296 console . log ( `Avg requests/client: ${ row . avg_requests_per_client } ` )
97+ console . log ( `Median requests/client: ${ row . median_requests_per_client } ` )
98+ console . log ( `Max requests/client: ${ row . max_requests_per_client } ` )
9399 console . log ( `Total requests (7d): ${ row . total_requests } ` )
94100}
95101
Original file line number Diff line number Diff line change @@ -23,6 +23,9 @@ const OUTPUT_COST_PER_TOKEN = 1.20 / 1_000_000
2323
2424const MAX_TOKENS = 100
2525
26+ // Stable session ID so all turns route to the same machine for prompt caching
27+ const SESSION_ID = `bench-${ Math . random ( ) . toString ( 36 ) . slice ( 2 , 10 ) } `
28+
2629function computeCost ( usage : Record < string , unknown > ) : { cost : number ; breakdown : string } {
2730 const inputTokens = typeof usage . prompt_tokens === 'number' ? usage . prompt_tokens : 0
2831 const outputTokens = typeof usage . completion_tokens === 'number' ? usage . completion_tokens : 0
@@ -175,6 +178,7 @@ async function makeConversationStreamRequest(
175178 headers : {
176179 Authorization : `Bearer ${ apiKey } ` ,
177180 'Content-Type' : 'application/json' ,
181+ 'x-session-affinity' : SESSION_ID ,
178182 } ,
179183 body : JSON . stringify ( {
180184 model : FIREWORKS_MODEL ,
@@ -277,6 +281,7 @@ async function main() {
277281 console . log ( `Max tokens: ${ MAX_TOKENS } (low output per turn)` )
278282 console . log ( `Turns: ${ TURN_PROMPTS . length } ` )
279283 console . log ( `Pricing: $0.30/M input, $0.03/M cached, $1.20/M output` )
284+ console . log ( `Session ID: ${ SESSION_ID } (x-session-affinity header)` )
280285 console . log ( '=' . repeat ( 60 ) )
281286 console . log ( )
282287
You can’t perform that action at this time.
0 commit comments