Fix orphan tool result!

jahooma · jahooma · commit c38865cef0f7 · 2025-12-29T19:32:04.000-08:00
diff --git a/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts b/packages/agent-runtime/src/__tests__/tool-validation-error.test.ts
@@ -0,0 +1,214 @@
+import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
+import { getInitialSessionState } from '@codebuff/common/types/session-state'
+import { beforeEach, describe, expect, it } from 'bun:test'
+
+import { disableLiveUserInputCheck } from '../live-user-inputs'
+import { processStream } from '../tools/stream-parser'
+import { mockFileContext } from './test-utils'
+
+import type { AgentTemplate } from '../templates/types'
+import type {
+  AgentRuntimeDeps,
+  AgentRuntimeScopedDeps,
+} from '@codebuff/common/types/contracts/agent-runtime'
+import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
+import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
+
+describe('tool validation error handling', () => {
+  let agentRuntimeImpl: AgentRuntimeDeps & AgentRuntimeScopedDeps
+
+  beforeEach(() => {
+    disableLiveUserInputCheck()
+    agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL, sendAction: () => {} }
+  })
+
+  const testAgentTemplate: AgentTemplate = {
+    id: 'test-agent',
+    displayName: 'Test Agent',
+    spawnerPrompt: 'Test agent',
+    model: 'claude-3-5-sonnet-20241022',
+    inputSchema: {},
+    outputMode: 'structured_output',
+    includeMessageHistory: true,
+    inheritParentSystemPrompt: false,
+    mcpServers: {},
+    toolNames: ['spawn_agents', 'end_turn'],
+    spawnableAgents: [],
+    systemPrompt: 'Test system prompt',
+    instructionsPrompt: 'Test instructions',
+    stepPrompt: 'Test step prompt',
+  }
+
+  it('should emit error event instead of tool result when spawn_agents receives invalid parameters', async () => {
+    // This simulates what happens when the LLM passes a string instead of an array to spawn_agents
+    // The error from Anthropic was: "Invalid parameters for spawn_agents: expected array, received string"
+    const invalidToolCallChunk: StreamChunk = {
+      type: 'tool-call',
+      toolName: 'spawn_agents',
+      toolCallId: 'test-tool-call-id',
+      input: {
+        agents: 'this should be an array not a string', // Invalid - should be array
+      },
+    }
+
+    async function* mockStream(): AsyncGenerator<StreamChunk, string | null> {
+      yield invalidToolCallChunk
+      return 'mock-message-id'
+    }
+
+    const sessionState = getInitialSessionState(mockFileContext)
+    const agentState = sessionState.mainAgentState
+
+    const responseChunks: (string | PrintModeEvent)[] = []
+
+    await processStream({
+      ...agentRuntimeImpl,
+      agentContext: {},
+      agentState,
+      agentStepId: 'test-step-id',
+      agentTemplate: testAgentTemplate,
+      ancestorRunIds: [],
+      clientSessionId: 'test-session',
+      fileContext: mockFileContext,
+      fingerprintId: 'test-fingerprint',
+      fullResponse: '',
+      localAgentTemplates: { 'test-agent': testAgentTemplate },
+      messages: [],
+      prompt: 'test prompt',
+      repoId: undefined,
+      repoUrl: undefined,
+      runId: 'test-run-id',
+      signal: new AbortController().signal,
+      stream: mockStream(),
+      system: 'test system',
+      tools: {},
+      userId: 'test-user',
+      userInputId: 'test-input-id',
+      onCostCalculated: async () => {},
+      onResponseChunk: (chunk) => {
+        responseChunks.push(chunk)
+      },
+    })
+
+    // Verify an error event was emitted (not a tool result)
+    const errorEvents = responseChunks.filter(
+      (chunk): chunk is Extract<PrintModeEvent, { type: 'error' }> =>
+        typeof chunk !== 'string' && chunk.type === 'error',
+    )
+    expect(errorEvents.length).toBe(1)
+    expect(errorEvents[0].message).toContain('Invalid parameters for spawn_agents')
+
+    // Verify NO tool_call event was emitted (since validation failed before that point)
+    const toolCallEvents = responseChunks.filter(
+      (chunk): chunk is Extract<PrintModeEvent, { type: 'tool_call' }> =>
+        typeof chunk !== 'string' && chunk.type === 'tool_call',
+    )
+    expect(toolCallEvents.length).toBe(0)
+
+    // Verify NO tool_result event was emitted
+    const toolResultEvents = responseChunks.filter(
+      (chunk): chunk is Extract<PrintModeEvent, { type: 'tool_result' }> =>
+        typeof chunk !== 'string' && chunk.type === 'tool_result',
+    )
+    expect(toolResultEvents.length).toBe(0)
+
+    // Verify the message history doesn't contain orphan tool results
+    // It should NOT have any tool messages since no tool call was made
+    const toolMessages = agentState.messageHistory.filter(
+      (m) => m.role === 'tool',
+    )
+    const assistantToolCalls = agentState.messageHistory.filter(
+      (m) =>
+        m.role === 'assistant' &&
+        m.content.some((c) => c.type === 'tool-call'),
+    )
+
+    // There should be no tool messages at all (the key fix!)
+    expect(toolMessages.length).toBe(0)
+    // And no assistant tool calls either
+    expect(assistantToolCalls.length).toBe(0)
+  })
+
+  it('should still emit tool_call and tool_result for valid tool calls', async () => {
+    // Create an agent that has read_files tool
+    const agentWithReadFiles: AgentTemplate = {
+      ...testAgentTemplate,
+      toolNames: ['read_files', 'end_turn'],
+    }
+
+    const validToolCallChunk: StreamChunk = {
+      type: 'tool-call',
+      toolName: 'read_files',
+      toolCallId: 'valid-tool-call-id',
+      input: {
+        paths: ['test.ts'], // Valid array parameter
+      },
+    }
+
+    async function* mockStream(): AsyncGenerator<StreamChunk, string | null> {
+      yield validToolCallChunk
+      return 'mock-message-id'
+    }
+
+    const sessionState = getInitialSessionState(mockFileContext)
+    const agentState = sessionState.mainAgentState
+
+    // Mock requestFiles to return a file
+    agentRuntimeImpl.requestFiles = async () => ({
+      'test.ts': 'console.log("test")',
+    })
+
+    const responseChunks: (string | PrintModeEvent)[] = []
+
+    await processStream({
+      ...agentRuntimeImpl,
+      agentContext: {},
+      agentState,
+      agentStepId: 'test-step-id',
+      agentTemplate: agentWithReadFiles,
+      ancestorRunIds: [],
+      clientSessionId: 'test-session',
+      fileContext: mockFileContext,
+      fingerprintId: 'test-fingerprint',
+      fullResponse: '',
+      localAgentTemplates: { 'test-agent': agentWithReadFiles },
+      messages: [],
+      prompt: 'test prompt',
+      repoId: undefined,
+      repoUrl: undefined,
+      runId: 'test-run-id',
+      signal: new AbortController().signal,
+      stream: mockStream(),
+      system: 'test system',
+      tools: {},
+      userId: 'test-user',
+      userInputId: 'test-input-id',
+      onCostCalculated: async () => {},
+      onResponseChunk: (chunk) => {
+        responseChunks.push(chunk)
+      },
+    })
+
+    // Verify tool_call event was emitted
+    const toolCallEvents = responseChunks.filter(
+      (chunk): chunk is Extract<PrintModeEvent, { type: 'tool_call' }> =>
+        typeof chunk !== 'string' && chunk.type === 'tool_call',
+    )
+    expect(toolCallEvents.length).toBe(1)
+    expect(toolCallEvents[0].toolName).toBe('read_files')
+
+    // Verify tool_result event was emitted
+    const toolResultEvents = responseChunks.filter(
+      (chunk): chunk is Extract<PrintModeEvent, { type: 'tool_result' }> =>
+        typeof chunk !== 'string' && chunk.type === 'tool_result',
+    )
+    expect(toolResultEvents.length).toBe(1)
+
+    // Verify NO error events
+    const errorEvents = responseChunks.filter(
+      (chunk): chunk is Extract<PrintModeEvent, { type: 'error' }> =>
+        typeof chunk !== 'string' && chunk.type === 'error',
+    )
+    expect(errorEvents.length).toBe(0)
+  })
+})
diff --git a/packages/agent-runtime/src/tools/tool-executor.ts b/packages/agent-runtime/src/tools/tool-executor.ts
@@ -1,6 +1,5 @@
 import { endsAgentStepParam } from '@codebuff/common/tools/constants'
 import { toolParams } from '@codebuff/common/tools/list'
-import { jsonToolResult } from '@codebuff/common/util/messages'
 import { generateCompactId } from '@codebuff/common/util/string'
 import { cloneDeep } from 'lodash'
 
@@ -177,16 +176,10 @@ export function executeToolCall<T extends ToolName>(
   }
 
   if ('error' in toolCall) {
-    const toolResult: ToolMessage = {
-      role: 'tool',
-      toolName,
-      toolCallId: toolCall.toolCallId,
-      content: jsonToolResult({
-        errorMessage: toolCall.error,
-      }),
-    }
-    toolResults.push(cloneDeep(toolResult))
-    toolResultsToAddAfterStream.push(cloneDeep(toolResult))
+    onResponseChunk({
+      type: 'error',
+      message: toolCall.error,
+    })
     logger.debug(
       { toolCall, error: toolCall.error },
       `${toolName} error: ${toolCall.error}`,
@@ -392,16 +385,10 @@ export async function executeCustomToolCall(
   }
 
   if ('error' in toolCall) {
-    const toolResult: ToolMessage = {
-      role: 'tool',
-      toolName,
-      toolCallId: toolCall.toolCallId,
-      content: jsonToolResult({
-        errorMessage: toolCall.error,
-      }),
-    }
-    toolResults.push(cloneDeep(toolResult))
-    toolResultsToAddAfterStream.push(cloneDeep(toolResult))
+    onResponseChunk({
+      type: 'error',
+      message: toolCall.error,
+    })
     logger.debug(
       { toolCall, error: toolCall.error },
       `${toolName} error: ${toolCall.error}`,