Skip to content

Commit 150a5f8

Browse files
jahoomaclaude
andcommitted
Route Kimi K2.6 requests through CanopyWave
Backend-only wiring. No agent or freebuff-model changes — current behavior is unchanged because nothing in the codebase requests moonshotai/kimi-k2.6 yet. Sets the stage for switching the freebuff "smart" model in a follow-up PR. - Add moonshotai/kimi-k2.6 to CANOPYWAVE_MODEL_MAP so isCanopyWaveModel picks it up. - Refactor canopywave pricing into a per-model map and add Kimi pricing ($0.60/$0.15/$2.50 per 1M in/cache/out, approximate Moonshot rates). - Flip useCanopyWave from `false` to isCanopyWaveModel(...) in _post.ts (stream + non-stream). For models not in the map this is a no-op — only minimax-m2.5 and kimi-k2.6 are affected, neither of which is currently used. Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
1 parent 69a3225 commit 150a5f8

2 files changed

Lines changed: 39 additions & 17 deletions

File tree

web/src/app/api/v1/chat/completions/_post.ts

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -532,9 +532,10 @@ export async function postChatCompletions(params: {
532532
if (bodyStream) {
533533
// Streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
534534
const useSiliconFlow = false // isSiliconFlowModel(typedBody.model)
535-
const useCanopyWave = false // isCanopyWaveModel(typedBody.model)
536-
const useFireworks = isFireworksModel(typedBody.model)
537-
const useOpenAIDirect = !useFireworks && isOpenAIDirectModel(typedBody.model)
535+
const useCanopyWave = isCanopyWaveModel(typedBody.model)
536+
const useFireworks = !useCanopyWave && isFireworksModel(typedBody.model)
537+
const useOpenAIDirect =
538+
!useCanopyWave && !useFireworks && isOpenAIDirectModel(typedBody.model)
538539
const stream = useSiliconFlow
539540
? await handleSiliconFlowStream({
540541
body: typedBody,
@@ -606,12 +607,12 @@ export async function postChatCompletions(params: {
606607
})
607608
} else {
608609
// Non-streaming request — route to SiliconFlow/CanopyWave/Fireworks for supported models
609-
// TEMPORARILY DISABLED: route through OpenRouter
610610
const model = typedBody.model
611611
const useSiliconFlow = false // isSiliconFlowModel(model)
612-
const useCanopyWave = false // isCanopyWaveModel(model)
613-
const useFireworks = isFireworksModel(model)
614-
const shouldUseOpenAIEndpoint = !useFireworks && isOpenAIDirectModel(model)
612+
const useCanopyWave = isCanopyWaveModel(model)
613+
const useFireworks = !useCanopyWave && isFireworksModel(model)
614+
const shouldUseOpenAIEndpoint =
615+
!useCanopyWave && !useFireworks && isOpenAIDirectModel(model)
615616

616617
const nonStreamRequest = useSiliconFlow
617618
? handleSiliconFlowNonStream({

web/src/llm-api/canopywave.ts

Lines changed: 31 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@ const canopywaveAgent = new Agent({
2929
/** Map from OpenRouter model IDs to CanopyWave model IDs */
3030
const CANOPYWAVE_MODEL_MAP: Record<string, string> = {
3131
'minimax/minimax-m2.5': 'minimax/minimax-m2.5',
32+
'moonshotai/kimi-k2.6': 'moonshotai/kimi-k2.6',
3233
}
3334

3435
export function isCanopyWaveModel(model: string): boolean {
@@ -85,12 +86,31 @@ function createCanopyWaveRequest(params: {
8586
})
8687
}
8788

88-
// CanopyWave per-token pricing (dollars per token) for MiniMax M2.5
89-
const CANOPYWAVE_INPUT_COST_PER_TOKEN = 0.27 / 1_000_000
90-
const CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN = 0.03 / 1_000_000
91-
const CANOPYWAVE_OUTPUT_COST_PER_TOKEN = 1.08 / 1_000_000
89+
// CanopyWave per-token pricing (dollars per token), keyed by OpenRouter model ID
90+
interface CanopyWavePricing {
91+
inputCostPerToken: number
92+
cachedInputCostPerToken: number
93+
outputCostPerToken: number
94+
}
95+
96+
const CANOPYWAVE_PRICING_MAP: Record<string, CanopyWavePricing> = {
97+
'minimax/minimax-m2.5': {
98+
inputCostPerToken: 0.27 / 1_000_000,
99+
cachedInputCostPerToken: 0.03 / 1_000_000,
100+
outputCostPerToken: 1.08 / 1_000_000,
101+
},
102+
'moonshotai/kimi-k2.6': {
103+
inputCostPerToken: 0.60 / 1_000_000,
104+
cachedInputCostPerToken: 0.15 / 1_000_000,
105+
outputCostPerToken: 2.50 / 1_000_000,
106+
},
107+
}
108+
109+
function getCanopyWavePricing(model: string): CanopyWavePricing {
110+
return CANOPYWAVE_PRICING_MAP[model] ?? CANOPYWAVE_PRICING_MAP['moonshotai/kimi-k2.6']
111+
}
92112

93-
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null): UsageData {
113+
function extractUsageAndCost(usage: Record<string, unknown> | undefined | null, model: string): UsageData {
94114
if (!usage) return { inputTokens: 0, outputTokens: 0, cacheReadInputTokens: 0, reasoningTokens: 0, cost: 0 }
95115
const promptDetails = usage.prompt_tokens_details as Record<string, unknown> | undefined | null
96116
const completionDetails = usage.completion_tokens_details as Record<string, unknown> | undefined | null
@@ -100,11 +120,12 @@ function extractUsageAndCost(usage: Record<string, unknown> | undefined | null):
100120
const cacheReadInputTokens = typeof promptDetails?.cached_tokens === 'number' ? promptDetails.cached_tokens : 0
101121
const reasoningTokens = typeof completionDetails?.reasoning_tokens === 'number' ? completionDetails.reasoning_tokens : 0
102122

123+
const pricing = getCanopyWavePricing(model)
103124
const nonCachedInputTokens = Math.max(0, inputTokens - cacheReadInputTokens)
104125
const cost =
105-
nonCachedInputTokens * CANOPYWAVE_INPUT_COST_PER_TOKEN +
106-
cacheReadInputTokens * CANOPYWAVE_CACHED_INPUT_COST_PER_TOKEN +
107-
outputTokens * CANOPYWAVE_OUTPUT_COST_PER_TOKEN
126+
nonCachedInputTokens * pricing.inputCostPerToken +
127+
cacheReadInputTokens * pricing.cachedInputCostPerToken +
128+
outputTokens * pricing.outputCostPerToken
108129

109130
return { inputTokens, outputTokens, cacheReadInputTokens, reasoningTokens, cost }
110131
}
@@ -139,7 +160,7 @@ export async function handleCanopyWaveNonStream({
139160
const data = await response.json()
140161
const content = data.choices?.[0]?.message?.content ?? ''
141162
const reasoningText = data.choices?.[0]?.message?.reasoning_content ?? data.choices?.[0]?.message?.reasoning ?? ''
142-
const usageData = extractUsageAndCost(data.usage)
163+
const usageData = extractUsageAndCost(data.usage, originalModel)
143164

144165
insertMessageToBigQuery({
145166
messageId: data.id,
@@ -453,7 +474,7 @@ async function handleResponse({
453474
return { state }
454475
}
455476

456-
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>)
477+
const usageData = extractUsageAndCost(data.usage as Record<string, unknown>, originalModel)
457478
const messageId = typeof data.id === 'string' ? data.id : 'unknown'
458479

459480
state.billedAlready = true

0 commit comments

Comments
 (0)