do not add reasoning tokens to message history

charleslien · charleslien · commit 6f4f56b25acb · 2025-09-15T18:00:19.000-07:00
diff --git a/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts b/backend/src/llm-apis/vercel-ai-sdk/ai-sdk.ts
@@ -5,11 +5,6 @@ import {
   geminiModels,
   openaiModels,
 } from '@codebuff/common/old-constants'
-import {
-  endToolTag,
-  startToolTag,
-  toolNameParam,
-} from '@codebuff/common/tools/constants'
 import { buildArray } from '@codebuff/common/util/array'
 import { convertCbToModelMessages } from '@codebuff/common/util/messages'
 import { errorToObject } from '@codebuff/common/util/object'
@@ -36,6 +31,17 @@ import type {
 import type { LanguageModel } from 'ai'
 import type { z } from 'zod/v4'
 
+export type StreamChunk =
+  | {
+      type: 'text'
+      text: string
+    }
+  | {
+      type: 'reasoning'
+      text: string
+    }
+  | { type: 'error'; message: string }
+
 // TODO: We'll want to add all our models here!
 const modelToAiSDKModel = (model: Model): LanguageModel => {
   if (
@@ -77,7 +83,7 @@ export const promptAiSdkStream = async function* (
     includeCacheControl?: boolean
     resolveMessageId?: (messageId: string) => unknown
   } & Omit<Parameters<typeof streamText>[0], 'model' | 'messages'>,
-) {
+): AsyncGenerator<StreamChunk> {
   if (
     !checkLiveUserInput(
       options.userId,
@@ -93,7 +99,10 @@ export const promptAiSdkStream = async function* (
       },
       'Skipping stream due to canceled user input',
     )
-    yield ''
+    yield {
+      type: 'text',
+      text: '',
+    }
     return
   }
   const startTime = Date.now()
@@ -108,7 +117,6 @@ export const promptAiSdkStream = async function* (
   })
 
   let content = ''
-  let reasoning = false
 
   for await (const chunk of response.fullStream) {
     if (chunk.type === 'error') {
@@ -131,9 +139,11 @@ export const promptAiSdkStream = async function* (
             ? chunk.error
             : JSON.stringify(chunk.error)
       const errorMessage = `Error from AI SDK (model ${options.model}): ${buildArray([mainErrorMessage, errorBody]).join('\n')}`
-      throw new Error(errorMessage, {
-        cause: chunk.error,
-      })
+      yield {
+        type: 'error',
+        message: errorMessage,
+      }
+      return
     }
     if (chunk.type === 'reasoning-delta') {
       if (
@@ -145,21 +155,17 @@ export const promptAiSdkStream = async function* (
       ) {
         continue
       }
-      if (!reasoning) {
-        reasoning = true
-        yield `${startToolTag}{
-  ${JSON.stringify(toolNameParam)}: "think_deeply",
-  "thought": "`
+      yield {
+        type: 'reasoning',
+        text: chunk.text,
       }
-      yield JSON.stringify(chunk.text).slice(1, -1)
     }
     if (chunk.type === 'text-delta') {
-      if (reasoning) {
-        reasoning = false
-        yield `"\n}${endToolTag}\n\n`
-      }
       content += chunk.text
-      yield chunk.text
+      yield {
+        type: 'text',
+        text: chunk.text,
+      }
     }
   }
 
diff --git a/backend/src/tools/stream-parser.ts b/backend/src/tools/stream-parser.ts
@@ -1,4 +1,9 @@
-import { toolNames } from '@codebuff/common/tools/constants'
+import {
+  endToolTag,
+  startToolTag,
+  toolNameParam,
+  toolNames,
+} from '@codebuff/common/tools/constants'
 import { buildArray } from '@codebuff/common/util/array'
 import { generateCompactId } from '@codebuff/common/util/string'
 
@@ -8,6 +13,7 @@ import { processStreamWithTags } from '../xml-stream-parser'
 import { executeCustomToolCall, executeToolCall } from './tool-executor'
 
 import type { CustomToolCall } from './tool-executor'
+import type { StreamChunk } from '../llm-apis/vercel-ai-sdk/ai-sdk'
 import type { AgentTemplate } from '../templates/types'
 import type { ToolName } from '@codebuff/common/tools/constants'
 import type { CodebuffToolCall } from '@codebuff/common/tools/list'
@@ -25,8 +31,8 @@ export type ToolCallError = {
   error: string
 } & Omit<ToolCallPart, 'type'>
 
-export async function processStreamWithTools<T extends string>(options: {
-  stream: AsyncGenerator<T> | ReadableStream<T>
+export async function processStreamWithTools(options: {
+  stream: AsyncGenerator<StreamChunk>
   ws: WebSocket
   agentStepId: string
   clientSessionId: string
@@ -169,9 +175,28 @@ export async function processStreamWithTools<T extends string>(options: {
     },
   )
 
+  let reasoning = false
   for await (const chunk of streamWithTags) {
-    onResponseChunk(chunk)
-    fullResponseChunks.push(chunk)
+    if (chunk.type === 'reasoning') {
+      if (!reasoning) {
+        reasoning = true
+        onResponseChunk(`\n\n${startToolTag}{
+  ${JSON.stringify(toolNameParam)}: "think_deeply",
+  "thought": "`)
+      }
+      onResponseChunk(JSON.stringify(chunk.text).slice(1, -1))
+    } else if (chunk.type === 'text') {
+      if (reasoning) {
+        reasoning = false
+        onResponseChunk(`"\n}${endToolTag}\n\n`)
+      }
+      onResponseChunk(chunk.text)
+      fullResponseChunks.push(chunk.text)
+    } else if (chunk.type === 'error') {
+      onResponseChunk(chunk)
+    } else {
+      chunk satisfies never
+    }
   }
 
   state.messages = buildArray<Message>([
diff --git a/backend/src/xml-stream-parser.ts b/backend/src/xml-stream-parser.ts
@@ -7,6 +7,7 @@ import {
   toolNameParam,
 } from '@codebuff/common/tools/constants'
 
+import type { StreamChunk } from './llm-apis/vercel-ai-sdk/ai-sdk'
 import type { Model } from '@codebuff/common/old-constants'
 import type {
   PrintModeError,
@@ -22,7 +23,7 @@ const toolExtractionPattern = new RegExp(
 const completionSuffix = `${JSON.stringify(endsAgentStepParam)}: true\n}${endToolTag}`
 
 export async function* processStreamWithTags(
-  stream: AsyncGenerator<string> | ReadableStream<string>,
+  stream: AsyncGenerator<StreamChunk>,
   processors: Record<
     string,
     {
@@ -39,7 +40,7 @@ export async function* processStreamWithTags(
     model?: Model
     agentName?: string
   },
-): AsyncGenerator<string> {
+): AsyncGenerator<StreamChunk> {
   let streamCompleted = false
   let buffer = ''
   let autocompleted = false
@@ -131,17 +132,20 @@ export async function* processStreamWithTags(
     matches.forEach(processToolCallContents)
   }
 
-  function* processChunk(chunk: string | undefined) {
-    if (chunk !== undefined) {
-      buffer += chunk
+  function* processChunk(chunk: StreamChunk | undefined) {
+    if (chunk !== undefined && chunk.type === 'text') {
+      buffer += chunk.text
     }
     extractToolsFromBufferAndProcess()
 
     if (chunk === undefined) {
       streamCompleted = true
       if (buffer.includes(startToolTag)) {
         buffer += completionSuffix
-        chunk = completionSuffix
+        chunk = {
+          type: 'text',
+          text: completionSuffix,
+        }
         autocompleted = true
       }
       extractToolsFromBufferAndProcess()
@@ -152,7 +156,7 @@ export async function* processStreamWithTags(
     }
   }
 
-  for await (const chunk of stream as AsyncIterable<string>) {
+  for await (const chunk of stream) {
     if (streamCompleted) {
       break
     }
@@ -163,7 +167,4 @@ export async function* processStreamWithTags(
     // After the stream ends, try parsing one last time in case there's leftover text
     yield* processChunk(undefined)
   }
-
-  for await (const chunk of stream as AsyncIterable<string>) {
-  }
 }