1313export { }
1414
1515const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
16- // const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/Infid5h9 '
17- const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
16+ const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9 '
17+ // const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
1818
1919// Pricing constants — https://fireworks.ai/pricing
2020const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
@@ -224,16 +224,13 @@ async function makeConversationStreamRequest(
224224 const chunk = JSON . parse ( raw )
225225 chunkCount ++
226226 const delta = chunk . choices ?. [ 0 ] ?. delta
227+ if ( delta && firstContentChunkTime === undefined ) {
228+ firstContentChunkTime = Date . now ( )
229+ ttftMs = firstContentChunkTime - startTime
230+ }
227231 if ( delta ?. content ) {
228- if ( firstContentChunkTime === undefined ) {
229- firstContentChunkTime = Date . now ( )
230- ttftMs = firstContentChunkTime - startTime
231- }
232232 streamContent += delta . content
233233 }
234- if ( delta ?. reasoning_content ) {
235- // Skip reasoning content for this test
236- }
237234 if ( chunk . usage ) streamUsage = chunk . usage
238235 } catch {
239236 // skip non-JSON lines
@@ -246,12 +243,9 @@ async function makeConversationStreamRequest(
246243 ? streamUsage . completion_tokens
247244 : 0
248245
249- const generationTimeMs = firstContentChunkTime !== undefined
250- ? Date . now ( ) - firstContentChunkTime
251- : elapsedMs
252- const outputTokensPerSec = generationTimeMs > 0
253- ? ( outputTokens / ( generationTimeMs / 1000 ) )
254- : 0
246+ const outputTokensPerSec = firstContentChunkTime !== undefined
247+ ? ( outputTokens / ( ( Date . now ( ) - firstContentChunkTime ) / 1000 ) )
248+ : undefined
255249
256250 // Print compact per-turn stats
257251 const inputTokens = streamUsage && typeof streamUsage . prompt_tokens === 'number' ? streamUsage . prompt_tokens : 0
@@ -260,7 +254,7 @@ async function makeConversationStreamRequest(
260254 const cacheRate = inputTokens > 0 ? ( ( cachedTokens / inputTokens ) * 100 ) . toFixed ( 1 ) : '0.0'
261255 const cost = streamUsage ? `$${ computeCost ( streamUsage ) . cost . toFixed ( 6 ) } ` : 'err'
262256
263- console . log ( ` ✅ ${ ( elapsedMs / 1000 ) . toFixed ( 2 ) } s | TTFT ${ ttftMs !== undefined ? ( ttftMs / 1000 ) . toFixed ( 2 ) + 's' : 'n/a' } | ${ inputTokens } in (${ cachedTokens } cached, ${ cacheRate } %) | ${ outputTokens } out @ ${ outputTokensPerSec . toFixed ( 1 ) } tok/s | ${ cost } ` )
257+ console . log ( ` ✅ ${ ( elapsedMs / 1000 ) . toFixed ( 2 ) } s | TTFT ${ ttftMs !== undefined ? ( ttftMs / 1000 ) . toFixed ( 2 ) + 's' : 'n/a' } | ${ inputTokens } in (${ cachedTokens } cached, ${ cacheRate } %) | ${ outputTokens } out @ ${ outputTokensPerSec !== undefined ? outputTokensPerSec . toFixed ( 1 ) + ' tok/s' : 'n/a' } | ${ cost } ` )
264258 console . log ( ` Response: ${ streamContent . slice ( 0 , 150 ) } ${ streamContent . length > 150 ? '...' : '' } ` )
265259 console . log ( )
266260
0 commit comments