Implement STEP_TEXT within custom parsing within run-programmatic-step. Get best of n editor working!

jahooma · jahooma · commit 72c617a441ad · 2025-11-26T16:38:16.000-08:00
diff --git a/.agents/editor/best-of-n/editor-best-of-n.ts b/.agents/editor/best-of-n/editor-best-of-n.ts
@@ -314,7 +314,7 @@ function* handleStepsMax({
   )
 
   // Spawn selector with implementations as params
-  const { toolResult: selectorResult } = yield {
+  const { toolResult: selectorResult, agentState: selectorAgentState } = yield {
     toolName: 'spawn_agents',
     input: {
       agents: [
@@ -353,27 +353,19 @@ function* handleStepsMax({
     return
   }
 
+  const numMessagesBeforeStepText = selectorAgentState.messageHistory.length
+
   const { agentState: postEditsAgentState } = yield {
     type: 'STEP_TEXT',
     text: chosenImplementation.content,
   } as StepText
   const { messageHistory } = postEditsAgentState
-  const lastAssistantMessageIndex = messageHistory.findLastIndex(
-    (message) => message.role === 'assistant',
-  )
-  const editToolResults = messageHistory
-    .slice(lastAssistantMessageIndex)
-    .filter((message) => message.role === 'tool')
-    .flatMap((message) => message.content)
-    .filter((output) => output.type === 'json')
-    .map((output) => output.value)
 
-  // Set output with the chosen implementation and reasoning
+  // Set output with the messages from running the step text of the chosen implementation
   yield {
     toolName: 'set_output',
     input: {
-      response: chosenImplementation.content,
-      toolResults: editToolResults,
+      messages: messageHistory.slice(numMessagesBeforeStepText),
     },
     includeToolCall: false,
   } satisfies ToolCall<'set_output'>
diff --git a/.agents/editor/best-of-n/editor-implementor.ts b/.agents/editor/best-of-n/editor-implementor.ts
@@ -37,7 +37,7 @@ export const createBestOfNImplementor = (options: {
     
 Your task is to write out ALL the code changes needed to complete the user's request in a single comprehensive response.
 
-Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, or spawn agents. Do not call any of these tools!
+Important: You can not make any other tool calls besides editing files. You cannot read more files, write todos, spawn agents, or set output. Do not call any of these tools!
 
 Write out what changes you would make using the tool call format below. Use this exact format for each file change:
 
diff --git a/evals/scaffolding.ts b/evals/scaffolding.ts
@@ -231,7 +231,6 @@ export async function runAgentStepScaffolding(
     signal: new AbortController().signal,
     spawnParams: undefined,
     system: 'Test system prompt',
-    textOverride: null,
     tools: {},
     userId: TEST_USER_ID,
     userInputId: generateCompactId(),
diff --git a/packages/agent-runtime/src/__tests__/n-parameter.test.ts b/packages/agent-runtime/src/__tests__/n-parameter.test.ts
@@ -104,7 +104,6 @@ describe('n parameter and GENERATE_N functionality', () => {
     runAgentStepBaseParams = {
       ...agentRuntimeImpl,
       additionalToolDefinitions: () => Promise.resolve({}),
-      textOverride: null,
       runId: 'test-run-id',
       ancestorRunIds: [],
       repoId: undefined,
diff --git a/packages/agent-runtime/src/__tests__/read-docs-tool.test.ts b/packages/agent-runtime/src/__tests__/read-docs-tool.test.ts
@@ -75,7 +75,6 @@ describe('read_docs tool with researcher agent (via web API facade)', () => {
     runAgentStepBaseParams = {
       ...agentRuntimeImpl,
       additionalToolDefinitions: () => Promise.resolve({}),
-      textOverride: null,
       runId: 'test-run-id',
       ancestorRunIds: [],
       repoId: undefined,
@@ -215,7 +214,6 @@ describe('read_docs tool with researcher agent (via web API facade)', () => {
 
     const { agentState: newAgentState } = await runAgentStep({
       ...runAgentStepBaseParams,
-      textOverride: null,
       fileContext: mockFileContextWithAgents,
       localAgentTemplates: agentTemplates,
       agentState,
diff --git a/packages/agent-runtime/src/__tests__/run-agent-step-tools.test.ts b/packages/agent-runtime/src/__tests__/run-agent-step-tools.test.ts
@@ -128,7 +128,6 @@ describe('runAgentStep - set_output tool', () => {
       signal: new AbortController().signal,
       spawnParams: undefined,
       system: 'Test system prompt',
-      textOverride: null,
       tools: {},
       userId: TEST_USER_ID,
       userInputId: 'test-input',
diff --git a/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts b/packages/agent-runtime/src/__tests__/run-programmatic-step.test.ts
@@ -1542,7 +1542,6 @@ describe('runProgrammaticStep', () => {
       const result = await runProgrammaticStep(mockParams)
 
       expect(result.endTurn).toBe(false)
-      expect(result.textOverride).toBe('Custom response text')
       expect(result.agentState.output?.error).toBeUndefined()
     })
 
diff --git a/packages/agent-runtime/src/__tests__/web-search-tool.test.ts b/packages/agent-runtime/src/__tests__/web-search-tool.test.ts
@@ -74,7 +74,6 @@ describe('web_search tool with researcher agent (via web API facade)', () => {
       signal: new AbortController().signal,
       spawnParams: undefined,
       system: 'Test system prompt',
-      textOverride: null,
       tools: {},
       userId: TEST_USER_ID,
       userInputId: 'test-input',
diff --git a/packages/agent-runtime/src/prompt-agent-stream.ts b/packages/agent-runtime/src/prompt-agent-stream.ts
@@ -26,7 +26,6 @@ export const getAgentStreamFromTemplate = (params: {
   runId: string
   sessionConnections: SessionRecord
   template: AgentTemplate
-  textOverride: string | null
   tools: ToolSet
   userId: string | undefined
   userInputId: string
@@ -48,7 +47,6 @@ export const getAgentStreamFromTemplate = (params: {
     runId,
     sessionConnections,
     template,
-    textOverride,
     tools,
     userId,
     userInputId,
@@ -59,14 +57,6 @@ export const getAgentStreamFromTemplate = (params: {
     trackEvent,
   } = params
 
-  if (textOverride !== null) {
-    async function* stream(): ReturnType<PromptAiSdkStreamFn> {
-      yield { type: 'text', text: textOverride!, agentId }
-      return crypto.randomUUID()
-    }
-    return stream()
-  }
-
   if (!template) {
     throw new Error('Agent template is null/undefined')
   }
diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts
@@ -526,7 +526,6 @@ export async function loopAgentSteps(
       | 'runId'
       | 'spawnParams'
       | 'system'
-      | 'textOverride'
       | 'tools'
     > &
     ParamsExcluding<
@@ -716,7 +715,6 @@ export async function loopAgentSteps(
       const startTime = new Date()
 
       // 1. Run programmatic step first if it exists
-      let textOverride = null
       let n: number | undefined = undefined
 
       if (agentTemplate.handleSteps) {
@@ -744,7 +742,6 @@ export async function loopAgentSteps(
           stepNumber,
           generateN,
         } = programmaticResult
-        textOverride = programmaticResult.textOverride
         n = generateN
 
         currentAgentState = programmaticAgentState
@@ -808,7 +805,6 @@ export async function loopAgentSteps(
         runId,
         spawnParams: currentParams,
         system,
-        textOverride: textOverride,
         tools,
 
         additionalToolDefinitions: async () => {
diff --git a/packages/agent-runtime/src/run-programmatic-step.ts b/packages/agent-runtime/src/run-programmatic-step.ts
diff --git a/packages/agent-runtime/src/util/__tests__/parse-tool-calls-from-text.test.ts b/packages/agent-runtime/src/util/__tests__/parse-tool-calls-from-text.test.ts
diff --git a/packages/agent-runtime/src/util/parse-tool-calls-from-text.ts b/packages/agent-runtime/src/util/parse-tool-calls-from-text.ts