Skip to content

Commit c38865c

Browse files
committed
Fix orphan tool result!
1 parent 08c08aa commit c38865c

File tree

2 files changed

+222
-21
lines changed

2 files changed

+222
-21
lines changed
Lines changed: 214 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,214 @@
1+
import { TEST_AGENT_RUNTIME_IMPL } from '@codebuff/common/testing/impl/agent-runtime'
2+
import { getInitialSessionState } from '@codebuff/common/types/session-state'
3+
import { beforeEach, describe, expect, it } from 'bun:test'
4+
5+
import { disableLiveUserInputCheck } from '../live-user-inputs'
6+
import { processStream } from '../tools/stream-parser'
7+
import { mockFileContext } from './test-utils'
8+
9+
import type { AgentTemplate } from '../templates/types'
10+
import type {
11+
AgentRuntimeDeps,
12+
AgentRuntimeScopedDeps,
13+
} from '@codebuff/common/types/contracts/agent-runtime'
14+
import type { StreamChunk } from '@codebuff/common/types/contracts/llm'
15+
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
16+
17+
describe('tool validation error handling', () => {
18+
let agentRuntimeImpl: AgentRuntimeDeps & AgentRuntimeScopedDeps
19+
20+
beforeEach(() => {
21+
disableLiveUserInputCheck()
22+
agentRuntimeImpl = { ...TEST_AGENT_RUNTIME_IMPL, sendAction: () => {} }
23+
})
24+
25+
const testAgentTemplate: AgentTemplate = {
26+
id: 'test-agent',
27+
displayName: 'Test Agent',
28+
spawnerPrompt: 'Test agent',
29+
model: 'claude-3-5-sonnet-20241022',
30+
inputSchema: {},
31+
outputMode: 'structured_output',
32+
includeMessageHistory: true,
33+
inheritParentSystemPrompt: false,
34+
mcpServers: {},
35+
toolNames: ['spawn_agents', 'end_turn'],
36+
spawnableAgents: [],
37+
systemPrompt: 'Test system prompt',
38+
instructionsPrompt: 'Test instructions',
39+
stepPrompt: 'Test step prompt',
40+
}
41+
42+
it('should emit error event instead of tool result when spawn_agents receives invalid parameters', async () => {
43+
// This simulates what happens when the LLM passes a string instead of an array to spawn_agents
44+
// The error from Anthropic was: "Invalid parameters for spawn_agents: expected array, received string"
45+
const invalidToolCallChunk: StreamChunk = {
46+
type: 'tool-call',
47+
toolName: 'spawn_agents',
48+
toolCallId: 'test-tool-call-id',
49+
input: {
50+
agents: 'this should be an array not a string', // Invalid - should be array
51+
},
52+
}
53+
54+
async function* mockStream(): AsyncGenerator<StreamChunk, string | null> {
55+
yield invalidToolCallChunk
56+
return 'mock-message-id'
57+
}
58+
59+
const sessionState = getInitialSessionState(mockFileContext)
60+
const agentState = sessionState.mainAgentState
61+
62+
const responseChunks: (string | PrintModeEvent)[] = []
63+
64+
await processStream({
65+
...agentRuntimeImpl,
66+
agentContext: {},
67+
agentState,
68+
agentStepId: 'test-step-id',
69+
agentTemplate: testAgentTemplate,
70+
ancestorRunIds: [],
71+
clientSessionId: 'test-session',
72+
fileContext: mockFileContext,
73+
fingerprintId: 'test-fingerprint',
74+
fullResponse: '',
75+
localAgentTemplates: { 'test-agent': testAgentTemplate },
76+
messages: [],
77+
prompt: 'test prompt',
78+
repoId: undefined,
79+
repoUrl: undefined,
80+
runId: 'test-run-id',
81+
signal: new AbortController().signal,
82+
stream: mockStream(),
83+
system: 'test system',
84+
tools: {},
85+
userId: 'test-user',
86+
userInputId: 'test-input-id',
87+
onCostCalculated: async () => {},
88+
onResponseChunk: (chunk) => {
89+
responseChunks.push(chunk)
90+
},
91+
})
92+
93+
// Verify an error event was emitted (not a tool result)
94+
const errorEvents = responseChunks.filter(
95+
(chunk): chunk is Extract<PrintModeEvent, { type: 'error' }> =>
96+
typeof chunk !== 'string' && chunk.type === 'error',
97+
)
98+
expect(errorEvents.length).toBe(1)
99+
expect(errorEvents[0].message).toContain('Invalid parameters for spawn_agents')
100+
101+
// Verify NO tool_call event was emitted (since validation failed before that point)
102+
const toolCallEvents = responseChunks.filter(
103+
(chunk): chunk is Extract<PrintModeEvent, { type: 'tool_call' }> =>
104+
typeof chunk !== 'string' && chunk.type === 'tool_call',
105+
)
106+
expect(toolCallEvents.length).toBe(0)
107+
108+
// Verify NO tool_result event was emitted
109+
const toolResultEvents = responseChunks.filter(
110+
(chunk): chunk is Extract<PrintModeEvent, { type: 'tool_result' }> =>
111+
typeof chunk !== 'string' && chunk.type === 'tool_result',
112+
)
113+
expect(toolResultEvents.length).toBe(0)
114+
115+
// Verify the message history doesn't contain orphan tool results
116+
// It should NOT have any tool messages since no tool call was made
117+
const toolMessages = agentState.messageHistory.filter(
118+
(m) => m.role === 'tool',
119+
)
120+
const assistantToolCalls = agentState.messageHistory.filter(
121+
(m) =>
122+
m.role === 'assistant' &&
123+
m.content.some((c) => c.type === 'tool-call'),
124+
)
125+
126+
// There should be no tool messages at all (the key fix!)
127+
expect(toolMessages.length).toBe(0)
128+
// And no assistant tool calls either
129+
expect(assistantToolCalls.length).toBe(0)
130+
})
131+
132+
it('should still emit tool_call and tool_result for valid tool calls', async () => {
133+
// Create an agent that has read_files tool
134+
const agentWithReadFiles: AgentTemplate = {
135+
...testAgentTemplate,
136+
toolNames: ['read_files', 'end_turn'],
137+
}
138+
139+
const validToolCallChunk: StreamChunk = {
140+
type: 'tool-call',
141+
toolName: 'read_files',
142+
toolCallId: 'valid-tool-call-id',
143+
input: {
144+
paths: ['test.ts'], // Valid array parameter
145+
},
146+
}
147+
148+
async function* mockStream(): AsyncGenerator<StreamChunk, string | null> {
149+
yield validToolCallChunk
150+
return 'mock-message-id'
151+
}
152+
153+
const sessionState = getInitialSessionState(mockFileContext)
154+
const agentState = sessionState.mainAgentState
155+
156+
// Mock requestFiles to return a file
157+
agentRuntimeImpl.requestFiles = async () => ({
158+
'test.ts': 'console.log("test")',
159+
})
160+
161+
const responseChunks: (string | PrintModeEvent)[] = []
162+
163+
await processStream({
164+
...agentRuntimeImpl,
165+
agentContext: {},
166+
agentState,
167+
agentStepId: 'test-step-id',
168+
agentTemplate: agentWithReadFiles,
169+
ancestorRunIds: [],
170+
clientSessionId: 'test-session',
171+
fileContext: mockFileContext,
172+
fingerprintId: 'test-fingerprint',
173+
fullResponse: '',
174+
localAgentTemplates: { 'test-agent': agentWithReadFiles },
175+
messages: [],
176+
prompt: 'test prompt',
177+
repoId: undefined,
178+
repoUrl: undefined,
179+
runId: 'test-run-id',
180+
signal: new AbortController().signal,
181+
stream: mockStream(),
182+
system: 'test system',
183+
tools: {},
184+
userId: 'test-user',
185+
userInputId: 'test-input-id',
186+
onCostCalculated: async () => {},
187+
onResponseChunk: (chunk) => {
188+
responseChunks.push(chunk)
189+
},
190+
})
191+
192+
// Verify tool_call event was emitted
193+
const toolCallEvents = responseChunks.filter(
194+
(chunk): chunk is Extract<PrintModeEvent, { type: 'tool_call' }> =>
195+
typeof chunk !== 'string' && chunk.type === 'tool_call',
196+
)
197+
expect(toolCallEvents.length).toBe(1)
198+
expect(toolCallEvents[0].toolName).toBe('read_files')
199+
200+
// Verify tool_result event was emitted
201+
const toolResultEvents = responseChunks.filter(
202+
(chunk): chunk is Extract<PrintModeEvent, { type: 'tool_result' }> =>
203+
typeof chunk !== 'string' && chunk.type === 'tool_result',
204+
)
205+
expect(toolResultEvents.length).toBe(1)
206+
207+
// Verify NO error events
208+
const errorEvents = responseChunks.filter(
209+
(chunk): chunk is Extract<PrintModeEvent, { type: 'error' }> =>
210+
typeof chunk !== 'string' && chunk.type === 'error',
211+
)
212+
expect(errorEvents.length).toBe(0)
213+
})
214+
})

packages/agent-runtime/src/tools/tool-executor.ts

Lines changed: 8 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,5 @@
11
import { endsAgentStepParam } from '@codebuff/common/tools/constants'
22
import { toolParams } from '@codebuff/common/tools/list'
3-
import { jsonToolResult } from '@codebuff/common/util/messages'
43
import { generateCompactId } from '@codebuff/common/util/string'
54
import { cloneDeep } from 'lodash'
65

@@ -177,16 +176,10 @@ export function executeToolCall<T extends ToolName>(
177176
}
178177

179178
if ('error' in toolCall) {
180-
const toolResult: ToolMessage = {
181-
role: 'tool',
182-
toolName,
183-
toolCallId: toolCall.toolCallId,
184-
content: jsonToolResult({
185-
errorMessage: toolCall.error,
186-
}),
187-
}
188-
toolResults.push(cloneDeep(toolResult))
189-
toolResultsToAddAfterStream.push(cloneDeep(toolResult))
179+
onResponseChunk({
180+
type: 'error',
181+
message: toolCall.error,
182+
})
190183
logger.debug(
191184
{ toolCall, error: toolCall.error },
192185
`${toolName} error: ${toolCall.error}`,
@@ -392,16 +385,10 @@ export async function executeCustomToolCall(
392385
}
393386

394387
if ('error' in toolCall) {
395-
const toolResult: ToolMessage = {
396-
role: 'tool',
397-
toolName,
398-
toolCallId: toolCall.toolCallId,
399-
content: jsonToolResult({
400-
errorMessage: toolCall.error,
401-
}),
402-
}
403-
toolResults.push(cloneDeep(toolResult))
404-
toolResultsToAddAfterStream.push(cloneDeep(toolResult))
388+
onResponseChunk({
389+
type: 'error',
390+
message: toolCall.error,
391+
})
405392
logger.debug(
406393
{ toolCall, error: toolCall.error },
407394
`${toolName} error: ${toolCall.error}`,

0 commit comments

Comments
 (0)