Correct fireworks deployment id

jahooma · jahooma · commit 0bcd73ffce30 · 2026-03-18T20:08:09.000-07:00
diff --git a/scripts/test-fireworks-long.ts b/scripts/test-fireworks-long.ts
@@ -13,8 +13,8 @@
 export { }
 
 const FIREWORKS_BASE_URL = 'https://api.fireworks.ai/inference/v1'
-// const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/Infid5h9'
-const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
+const FIREWORKS_MODEL = 'accounts/james-65d217/deployments/lnfid5h9'
+// const FIREWORKS_MODEL = 'accounts/fireworks/models/minimax-m2p5'
 
 // Pricing constants — https://fireworks.ai/pricing
 const INPUT_COST_PER_TOKEN = 0.30 / 1_000_000
@@ -224,16 +224,13 @@ async function makeConversationStreamRequest(
         const chunk = JSON.parse(raw)
         chunkCount++
         const delta = chunk.choices?.[0]?.delta
+        if (delta && firstContentChunkTime === undefined) {
+          firstContentChunkTime = Date.now()
+          ttftMs = firstContentChunkTime - startTime
+        }
         if (delta?.content) {
-          if (firstContentChunkTime === undefined) {
-            firstContentChunkTime = Date.now()
-            ttftMs = firstContentChunkTime - startTime
-          }
           streamContent += delta.content
         }
-        if (delta?.reasoning_content) {
-          // Skip reasoning content for this test
-        }
         if (chunk.usage) streamUsage = chunk.usage
       } catch {
         // skip non-JSON lines
@@ -246,12 +243,9 @@ async function makeConversationStreamRequest(
     ? streamUsage.completion_tokens
     : 0
 
-  const generationTimeMs = firstContentChunkTime !== undefined
-    ? Date.now() - firstContentChunkTime
-    : elapsedMs
-  const outputTokensPerSec = generationTimeMs > 0
-    ? (outputTokens / (generationTimeMs / 1000))
-    : 0
+  const outputTokensPerSec = firstContentChunkTime !== undefined
+    ? (outputTokens / ((Date.now() - firstContentChunkTime) / 1000))
+    : undefined
 
   // Print compact per-turn stats
   const inputTokens = streamUsage && typeof streamUsage.prompt_tokens === 'number' ? streamUsage.prompt_tokens : 0
@@ -260,7 +254,7 @@ async function makeConversationStreamRequest(
   const cacheRate = inputTokens > 0 ? ((cachedTokens / inputTokens) * 100).toFixed(1) : '0.0'
   const cost = streamUsage ? `$${computeCost(streamUsage).cost.toFixed(6)}` : 'err'
 
-  console.log(`   ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec.toFixed(1)} tok/s | ${cost}`)
+  console.log(`   ✅ ${(elapsedMs / 1000).toFixed(2)}s | TTFT ${ttftMs !== undefined ? (ttftMs / 1000).toFixed(2) + 's' : 'n/a'} | ${inputTokens} in (${cachedTokens} cached, ${cacheRate}%) | ${outputTokens} out @ ${outputTokensPerSec !== undefined ? outputTokensPerSec.toFixed(1) + ' tok/s' : 'n/a'} | ${cost}`)
   console.log(`   Response: ${streamContent.slice(0, 150)}${streamContent.length > 150 ? '...' : ''}`)
   console.log()
 
diff --git a/web/src/llm-api/__tests__/fireworks-deployment.test.ts b/web/src/llm-api/__tests__/fireworks-deployment.test.ts
@@ -13,7 +13,7 @@ import {
 import type { Logger } from '@codebuff/common/types/contracts/logger'
 
 const STANDARD_MODEL_ID = 'accounts/fireworks/models/minimax-m2p5'
-const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/Infid5h9'
+const DEPLOYMENT_MODEL_ID = 'accounts/james-65d217/deployments/lnfid5h9'
 
 function createMockLogger(): Logger {
   return {
diff --git a/web/src/llm-api/fireworks.ts b/web/src/llm-api/fireworks.ts
@@ -36,7 +36,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = false
 
 /** Custom deployment IDs for models with dedicated Fireworks deployments */
 const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {
-  'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/Infid5h9',
+  'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',
 }
 
 /** Check if current time is within deployment hours (10am–8pm ET) */

Original file line number	Diff line number	Diff line change
`@@ -36,7 +36,7 @@ const FIREWORKS_USE_CUSTOM_DEPLOYMENT = false`
`36`	`36`
`37`	`37`	`/** Custom deployment IDs for models with dedicated Fireworks deployments */`
`38`	`38`	`const FIREWORKS_DEPLOYMENT_MAP: Record<string, string> = {`
`39`		`- 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/Infid5h9',`
	`39`	`+ 'minimax/minimax-m2.5': 'accounts/james-65d217/deployments/lnfid5h9',`
`40`	`40`	`}`
`41`	`41`
`42`	`42`	`/** Check if current time is within deployment hours (10am–8pm ET) */`