base2-gpt-5!

jahooma · jahooma · commit 1a835a8308e9 · 2025-10-31T17:46:58.000-07:00
diff --git a/.agents/base2/base2-gpt-5.ts b/.agents/base2/base2-gpt-5.ts
@@ -1,12 +1,11 @@
 import { createBase2 } from './base2'
 import type { SecretAgentDefinition } from '../types/secret-agent-definition'
 
-const base2 = createBase2('fast')
+const base2 = createBase2('fast', { isGpt5: true })
 
 const definition: SecretAgentDefinition = {
   ...base2,
   id: 'base2-gpt-5',
-  model: 'openai/gpt-5',
 }
 
 export default definition
diff --git a/.agents/base2/base2.ts b/.agents/base2/base2.ts
@@ -10,15 +10,16 @@ export const createBase2: (
   mode: 'fast' | 'max',
   options?: {
     hasNoValidation?: boolean
+    isGpt5?: boolean
   },
 ) => Omit<SecretAgentDefinition, 'id'> = (mode, options) => {
-  const { hasNoValidation = false } = options ?? {}
+  const { hasNoValidation = false, isGpt5 = false } = options ?? {}
   const isFast = mode === 'fast'
   const isMax = mode === 'max'
 
   return {
     publisher,
-    model: 'anthropic/claude-sonnet-4.5',
+    model: isGpt5 ? 'openai/gpt-5' : 'anthropic/claude-sonnet-4.5',
     displayName: 'Buffy the Orchestrator',
     spawnerPrompt:
       'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks',
@@ -48,15 +49,18 @@ export const createBase2: (
       'write_file',
     ),
     spawnableAgents: buildArray(
-      'file-researcher',
+      !isGpt5 && 'file-researcher',
       'file-picker-max',
       'code-searcher',
       'directory-lister',
       'glob-matcher',
       'researcher-web',
       'researcher-docs',
       'commander',
-      isFast && 'best-of-n-orchestrator-fast',
+      isFast &&
+        (isGpt5
+          ? 'best-of-n-orchestrator-gpt-5'
+          : 'best-of-n-orchestrator-fast'),
       isMax && 'base2-gpt-5-worker',
       'context-pruner',
     ),
@@ -74,13 +78,14 @@ Continue to spawn layers of agents until have completed the user's request or re
 ## Spawning agents guidelines
 
 - **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. Be conservative sequencing agents so they can build on each other's insights:
-  - Spawn file pickers, code-searcher, directory-lister, glob-matcher, commanders, and researchers before making edits.
+  - Spawn ${isGpt5 ? 'file pickers, code-searcher, directory-lister, glob-matcher, commanders, and researchers' : 'the file researcher and optionally the web researcher and docs researcher'} before making edits.${isGpt5 ? '' : ' After that, spawn further agents to gather context as needed (e.g. the code-searcher, directory-lister, glob-matcher, commanders, and researchers).'}
   ${buildArray(
     isFast &&
-      '- Spawn a best-of-n-orchestrator-fast agent to implement the changes after you have gathered all the context you need (and not before!).',
+      `- Spawn a ${isGpt5 ? 'best-of-n-orchestrator-gpt-5' : 'best-of-n-orchestrator-fast'} agent to implement the changes after you have gathered all the context you need (and not before!).`,
     isMax &&
       '- Spawn a base2-gpt-5-worker agent inline after you have gathered all the context you need (and not before!).',
   ).join('\n  ')}
+- **Spawn with the correct prompt and/or params:** Each agent has a schema for the input it expects. The prompt is an optional string, and the params is a json object. Note that some agents don't take any input prompt or params.
 - **No need to include context:** When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context.
 
 # Core Mandates
@@ -143,20 +148,33 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
 The user asks you to implement a new feature. You respond in multiple steps:
 
 ${buildArray(
-  '- First, you must spawn a file-researcher to find relevant files; consider also spawning a web and/or docs researcher to find relevant information online. (Note: For the first layer, only spawn researchers, not other agents. Do not spawn a code-searcher yet!)',
-  '- Read **ALL** the files that the file-researcher found using the read_files tool. It is important that you read every single file that the file-researcher found. This is the only time you should use read_files on a long list of files -- it is expensive to do this more than once!',
-  `- Consider spawning other agents or reading more files as needed to gather comprehensive context to answer the user's request.`,
+  !isGpt5 &&
+    '- First, for a new task, you must spawn a file-researcher to find relevant files; consider also spawning a web and/or docs researcher to find relevant information online. (Note: For the first layer, only spawn researchers, not other agents. Do not spawn a code-searcher yet!)',
+  !isGpt5 &&
+    '- Read **ALL** the files that the file-researcher found using the read_files tool. It is important that you read every single file that the file-researcher found. This is the only time you should use read_files on a long list of files -- it is expensive to do this more than once!',
+  `- Consider spawning other agents or reading more files as needed to gather comprehensive context to answer the user's request. When in doubt, read more files!`,
+  isGpt5 &&
+    `- Spawn file pickers, code-searcher, directory-lister, glob-matcher, commanders, and researchers to gather context as needed. Read all the relevant files using the read_files tool. Read as many files as possible so that you have a comprehensive context on the user's request.`,
   isFast &&
     `- Use the write_todos tool to write out your step-by-step implementation plan.${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'}`,
   isFast &&
-    `- You must spawn the best-of-n-orchestrator-fast agent to implement the code changes, since it will generate the best code changes from multiple implementation proposals, which the user wants you to do.`,
+    `- You must spawn the ${isGpt5 ? 'best-of-n-orchestrator-gpt-5' : 'best-of-n-orchestrator-fast'} agent to implement the code changes, since it will generate the best code changes from multiple implementation proposals, which the user wants you to do.`,
   isMax &&
     `- IMPORTANT: You must spawn a base2-gpt-5-worker agent inline (with spawn_agent_inline tool) to do the planning and editing.`,
   !hasNoValidation &&
     `- Test your changes${isFast ? ' briefly' : ''} by running appropriate validation commands for the project (e.g. typechecks, tests, lints, etc.). You may have to explore the project to find the appropriate commands. Don't skip this step!`,
   `- Inform the user that you have completed the task in one sentence or a few short bullet points. Don't create any markdown summary files or example documentation files, unless asked by the user. If you already finished the user request and said you're done, then don't say anything else.`,
 ).join('\n')}`,
-    stepPrompt: `${isMax ? "Keep working until the user's request is completely satisfied. " : ''}${isFast ? "You must spawn the best-of-n-orchestrator-fast agent to implement any code changes. Don't forget to do this! " : ''}After completing the user request, summarize your changes in a sentence or a few short bullet points. Do not create any summary markdown files or example documentation files, unless asked by the user. If you already summarized your changes, then end turn and don't say anything else.`,
+    stepPrompt: buildArray(
+      (isMax || isGpt5) &&
+        `Keep working until the user's request is completely satisfied${!hasNoValidation ? ' and validated' : ''}. `,
+      isFast &&
+        `You must spawn the ${isGpt5 ? 'best-of-n-orchestrator-gpt-5' : 'best-of-n-orchestrator-fast'} agent to implement any code changes. Don't forget to do this! `,
+      `After completing the user request, summarize your changes in a sentence or a few short bullet points. Do not create any summary markdown files or example documentation files, unless asked by the user. If you already summarized your changes, then end turn and don't say anything else.`,
+      isGpt5 &&
+        `IMPORTANT: every response *must* include at least one tool call (using "<codebuff_tool_call>" tags), unless you are done with the task. If you don't include at least one tool call, your response will be cut off and the task will be ended prematurely, which is very bad for fulfilling the user's request. When completely done, you can respond without a tool call.`,
+    ).join('\n'),
+
     handleSteps: function* ({ params }) {
       let steps = 0
       while (true) {
diff --git a/.agents/base2/best-of-n/best-of-n-implementor.ts b/.agents/base2/best-of-n/best-of-n-implementor.ts
@@ -60,7 +60,10 @@ OR for new files or major rewrites:
   "content": "Complete file content or edit snippet"
 }
 </codebuff_tool_call>
-
+${
+  isGpt5
+    ? ``
+    : `
 You can also use <think> tags interspersed between tool calls to think about the best way to implement the changes. Keep these thoughts very brief. You may not need to use think tags at all.
 
 <example>
@@ -85,7 +88,8 @@ You can also use <think> tags interspersed between tool calls to think about the
 [ Third tool call to implement the feature ]
 </codebuff_tool_call>
 
-</example>
+</example>`
+}
 
 Your implementation should:
 - Be complete and comprehensive
diff --git a/.agents/base2/best-of-n/best-of-n-orchestrator-fast.ts b/.agents/base2/best-of-n/best-of-n-orchestrator-fast.ts
@@ -8,7 +8,7 @@ const definition: SecretAgentDefinition = {
   model: 'anthropic/claude-sonnet-4.5',
   displayName: 'Best-of-N Fast Implementation Orchestrator',
   spawnerPrompt:
-    'Orchestrates multiple implementor agents to generate implementation proposals and selects the best one',
+    'Orchestrates multiple implementor agents to generate implementation proposals, selects the best one, and applies the changes.',
 
   includeMessageHistory: true,
   inheritParentSystemPrompt: true,
diff --git a/.agents/base2/best-of-n/best-of-n-orchestrator-gpt-5.ts b/.agents/base2/best-of-n/best-of-n-orchestrator-gpt-5.ts
@@ -0,0 +1,150 @@
+import type { SecretAgentDefinition } from '../../types/secret-agent-definition'
+import { publisher } from '../../constants'
+import { StepText, ToolCall } from 'types/agent-definition'
+
+const definition: SecretAgentDefinition = {
+  id: 'best-of-n-orchestrator-gpt-5',
+  publisher,
+  model: 'openai/gpt-5',
+  displayName: 'Best-of-N GPT-5 Implementation Orchestrator',
+  spawnerPrompt:
+    'Orchestrates multiple implementor agents to generate implementation proposals, selects the best one, and applies the changes (no need to make the edits yourself).',
+
+  includeMessageHistory: true,
+  inheritParentSystemPrompt: true,
+
+  toolNames: [
+    'spawn_agents',
+    'str_replace',
+    'write_file',
+    'set_messages',
+    'set_output',
+  ],
+  spawnableAgents: ['best-of-n-implementor-gpt-5', 'best-of-n-selector-gpt-5'],
+
+  inputSchema: {},
+  outputMode: 'structured_output',
+
+  handleSteps: function* ({ agentState }) {
+    // Remove userInstruction message for this agent.
+    const messages = agentState.messageHistory.concat()
+    messages.pop()
+    yield {
+      toolName: 'set_messages',
+      input: {
+        messages,
+      },
+      includeToolCall: false,
+    } satisfies ToolCall<'set_messages'>
+
+    // Spawn 1 of each model for easy prompt caching
+    const { toolResult: implementorsResult1 } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: [
+          { agent_type: 'best-of-n-implementor-gpt-5' },
+          { agent_type: 'best-of-n-implementor-gpt-5' },
+          { agent_type: 'best-of-n-implementor-gpt-5' },
+          { agent_type: 'best-of-n-implementor-gpt-5' },
+          { agent_type: 'best-of-n-implementor-gpt-5' },
+        ],
+      },
+      includeToolCall: false,
+    }
+
+    const implementorsResult = extractSpawnResults<string>(implementorsResult1)
+
+    // Extract all the plans from the structured outputs
+    const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+    // Parse implementations from tool results
+    const implementations = implementorsResult.map((content, index) => ({
+      id: letters[index],
+      content,
+    }))
+
+    // Spawn selector with implementations as params
+    const { toolResult: selectorResult } = yield {
+      toolName: 'spawn_agents',
+      input: {
+        agents: [
+          {
+            agent_type: 'best-of-n-selector-gpt-5',
+            params: { implementations },
+          },
+        ],
+      },
+      includeToolCall: false,
+    } satisfies ToolCall<'spawn_agents'>
+
+    const selectorOutput = extractSpawnResults<{
+      implementationId: string
+      reasoning: string
+    }>(selectorResult)[0]
+
+    if ('errorMessage' in selectorOutput) {
+      yield {
+        toolName: 'set_output',
+        input: { error: selectorOutput.errorMessage },
+      } satisfies ToolCall<'set_output'>
+      return
+    }
+    const { implementationId } = selectorOutput
+    const chosenImplementation = implementations.find(
+      (implementation) => implementation.id === implementationId,
+    )
+    if (!chosenImplementation) {
+      yield {
+        toolName: 'set_output',
+        input: { error: 'Failed to find chosen implementation.' },
+      } satisfies ToolCall<'set_output'>
+      return
+    }
+
+    // Apply the chosen implementation using STEP_TEXT
+    const { agentState: postEditsAgentState } = yield {
+      type: 'STEP_TEXT',
+      text: chosenImplementation.content,
+    } as StepText
+    const { messageHistory } = postEditsAgentState
+    const lastAssistantMessageIndex = messageHistory.findLastIndex(
+      (message) => message.role === 'assistant',
+    )
+    const editToolResults = messageHistory
+      .slice(lastAssistantMessageIndex)
+      .filter((message) => message.role === 'tool')
+      .flatMap((message) => message.content.output)
+      .filter((output) => output.type === 'json')
+      .map((output) => output.value)
+
+    // Set output with the chosen implementation and reasoning
+    yield {
+      toolName: 'set_output',
+      input: {
+        response: chosenImplementation.content,
+        toolResults: editToolResults,
+      },
+    } satisfies ToolCall<'set_output'>
+
+    function extractSpawnResults<T>(
+      results: any[] | undefined,
+    ): (T | { errorMessage: string })[] {
+      if (!results) return []
+      const spawnedResults = results
+        .filter((result) => result.type === 'json')
+        .map((result) => result.value)
+        .flat() as {
+        agentType: string
+        value: { value?: T; errorMessage?: string }
+      }[]
+      return spawnedResults.map(
+        (result) =>
+          result.value.value ?? {
+            errorMessage:
+              result.value.errorMessage ?? 'Error extracting spawn results',
+          },
+      )
+    }
+  },
+}
+
+export default definition
diff --git a/.agents/base2/best-of-n/best-of-n-orchestrator.ts b/.agents/base2/best-of-n/best-of-n-orchestrator.ts
@@ -8,7 +8,7 @@ const definition: SecretAgentDefinition = {
   model: 'anthropic/claude-sonnet-4.5',
   displayName: 'Best-of-N Implementation Orchestrator',
   spawnerPrompt:
-    'Orchestrates multiple implementor agents to generate implementation proposals and selects the best one',
+    'Orchestrates multiple implementor agents to generate implementation proposals, selects the best one, and applies the changes.',
 
   includeMessageHistory: true,
   inheritParentSystemPrompt: true,