CodebuffAI
diff --git a/‎.agents/base2/base2-with-code-reviewer-best-of-n.ts‎
Lines changed: 8 additions & 0 deletions b/‎.agents/base2/base2-with-code-reviewer-best-of-n.ts‎
Lines changed: 8 additions & 0 deletions
diff --git a/‎.agents/base2/base2.ts‎
Lines changed: 11 additions & 1 deletion b/‎.agents/base2/base2.ts‎
Lines changed: 11 additions & 1 deletion
diff --git a/‎.agents/reviewer/code-reviewer-best-of-n.ts‎
Lines changed: 266 additions & 0 deletions b/‎.agents/reviewer/code-reviewer-best-of-n.ts‎
Lines changed: 266 additions & 0 deletions
@@ -0,0 +1,8 @@
+import { createBase2 } from './base2'
+
+const definition = {
+  ...createBase2('default', { hasCodeReviewerBestOfN: true }),
+  id: 'base2-with-code-reviewer-best-of-n',
+  displayName: 'Buffy the Code Reviewing Best-of-N Orchestrator',
+}
+export default definition
@@ -12,12 +12,14 @@ export function createBase2(
     hasNoValidation?: boolean
     planOnly?: boolean
     hasCodeReviewer?: boolean
+    hasCodeReviewerBestOfN?: boolean
   },
 ): Omit<SecretAgentDefinition, 'id'> {
   const {
     hasNoValidation = false,
     planOnly = false,
     hasCodeReviewer = false,
+    hasCodeReviewerBestOfN = false,
   } = options ?? {}
   const isDefault = mode === 'default'
   const isFast = mode === 'fast'
@@ -80,6 +82,7 @@ export function createBase2(
       isDefault && 'thinker-best-of-n',
       isGpt5 && 'thinker-best-of-n-gpt-5',
       hasCodeReviewer && 'code-reviewer',
+      hasCodeReviewerBestOfN && 'code-reviewer-best-of-n',
       'context-pruner',
     ),
 
@@ -133,6 +136,8 @@ Use the spawn_agents tool to spawn specialized agents to help you complete the u
     '- Spawn commanders sequentially if the second command depends on the the first.',
     hasCodeReviewer &&
       '- Spawn a code-reviewer agent to review the code changes after you have made them.',
+    hasCodeReviewerBestOfN &&
+      '- Spawn a code-reviewer-best-of-n agent to review the code changes after you have made them.',
   ).join('\n  ')}
 - **No need to include context:** When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context.
 
@@ -179,6 +184,7 @@ ${PLACEHOLDER.GIT_CHANGES_PROMPT}
           isMax,
           hasNoValidation,
           hasCodeReviewer,
+          hasCodeReviewerBestOfN,
         }),
     stepPrompt: planOnly
       ? buildPlanOnlyStepPrompt({})
@@ -220,6 +226,7 @@ function buildImplementationInstructionsPrompt({
   isMax,
   hasNoValidation,
   hasCodeReviewer,
+  hasCodeReviewerBestOfN,
 }: {
   isSonnet: boolean
   isGpt5: boolean
@@ -228,6 +235,7 @@ function buildImplementationInstructionsPrompt({
   isMax: boolean
   hasNoValidation: boolean
   hasCodeReviewer: boolean
+  hasCodeReviewerBestOfN: boolean
 }) {
   return `Act as a helpful assistant and freely respond to the user's request however would be most helpful to the user. Use your judgement to orchestrate the completion of the user's request using your specialized sub-agents and tools as needed. Take your time and be comprehensive.
 
@@ -238,11 +246,13 @@ The user asks you to implement a new feature. You respond in multiple steps:
 ${buildArray(
   EXPLORE_PROMPT,
   `- Important: Read as many files as could possibly be relevant to the task over several steps to improve your understanding of the user's request and produce the best possible code changes. Find more examples within the codebase similar to the user's request, dependencies that help with understanding how things work, tests, etc. This is frequently 12-20 files, depending on the task.`,
-  `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${hasCodeReviewer ? ' Include a step to review the code changes with the code-reviewer agent after you have made them.' : ''}${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} Skip write_todos for simple tasks like quick edits or answering questions.`,
+  `- For any task requiring 3+ steps, use the write_todos tool to write out your step-by-step implementation plan. Include ALL of the applicable tasks in the list.${hasCodeReviewer ? ' Include a step to review the code changes with the code-reviewer agent after you have made them.' : ''}${hasCodeReviewerBestOfN ? ' Include a step to review the code changes with the code-reviewer-best-of-n agent after you have made them.' : ''}${hasNoValidation ? '' : ' You should include at least one step to validate/test your changes: be specific about whether to typecheck, run tests, run lints, etc.'} Skip write_todos for simple tasks like quick edits or answering questions.`,
   !isFast &&
     `- You must spawn the ${isGpt5 ? 'editor-best-of-n-gpt-5' : 'editor-best-of-n'} agent to implement non-trivial code changes, since it will generate the best code changes from multiple implementation proposals. This is the best way to make high quality code changes -- strongly prefer using this agent over the str_replace or write_file tools, unless the change is very straightforward and obvious.`,
   hasCodeReviewer &&
     `- Spawn a code-reviewer agent to review the code changes after you have made them. You can skip this step for small changes that are obvious and don't require a review.`,
+  hasCodeReviewerBestOfN &&
+    `- Spawn a code-reviewer-best-of-n agent to review the code changes after you have made them. You can skip this step for small changes that are obvious and don't require a review.`,
   !hasNoValidation &&
     `- Test your changes${isMax ? '' : ' briefly'} by running appropriate validation commands for the project (e.g. typechecks, tests, lints, etc.).${isMax ? ' Start by type checking the specific area of the project that you are editing and then test the entire project if necessary.' : ' If you can, only typecheck/test the area of the project that you are editing, rather than the entire project.'} You may have to explore the project to find the appropriate commands. Don't skip this step!`,
   `- Inform the user that you have completed the task in one sentence or a few short bullet points.${isSonnet ? " Don't create any markdown summary files or example documentation files, unless asked by the user." : ''}`,
 
@@ -0,0 +1,266 @@
+import { publisher } from '../constants'
+
+import type { AgentStepContext, ToolCall } from '../types/agent-definition'
+import type { SecretAgentDefinition } from '../types/secret-agent-definition'
+
+export function createCodeReviewerBestOfN(
+  model: 'sonnet' | 'gpt-5',
+): Omit<SecretAgentDefinition, 'id'> {
+  const isGpt5 = model === 'gpt-5'
+
+  return {
+    publisher,
+    model: isGpt5 ? 'openai/gpt-5' : 'anthropic/claude-sonnet-4.5',
+    displayName: isGpt5
+      ? 'Best-of-N GPT-5 Code Reviewer'
+      : 'Best-of-N Fast Code Reviewer',
+    spawnerPrompt:
+      'Reviews code by orchestrating multiple reviewer agents to generate review proposals, selects the best one, and provides the final review. Do not specify an input prompt for this agent; it reads the context from the message history.',
+
+    includeMessageHistory: true,
+    inheritParentSystemPrompt: true,
+
+    toolNames: ['spawn_agents', 'set_messages', 'set_output'],
+    spawnableAgents: isGpt5
+      ? ['code-reviewer-implementor-gpt-5', 'code-reviewer-selector-gpt-5']
+      : ['code-reviewer-implementor', 'code-reviewer-selector'],
+
+    inputSchema: {
+      params: {
+        type: 'object',
+        properties: {
+          n: {
+            type: 'number',
+            description:
+              'Number of parallel reviewer agents to spawn. Defaults to 5. Use fewer for simple reviews and max of 10 for complex reviews.',
+          },
+        },
+      },
+    },
+    outputMode: 'structured_output',
+
+    handleSteps: isGpt5 ? handleStepsGpt5 : handleStepsSonnet,
+  }
+}
+
+function* handleStepsSonnet({
+  agentState,
+  params,
+}: AgentStepContext): ReturnType<
+  NonNullable<SecretAgentDefinition['handleSteps']>
+> {
+  const implementorAgent = 'code-reviewer-implementor'
+  const selectorAgent = 'code-reviewer-selector'
+  const n = Math.min(10, Math.max(1, (params?.n as number | undefined) ?? 5))
+
+  // Remove userInstruction message for this agent.
+  const messages = agentState.messageHistory.concat()
+  messages.pop()
+  yield {
+    toolName: 'set_messages',
+    input: {
+      messages,
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'set_messages'>
+
+  const { toolResult: implementorsResult1 } = yield {
+    toolName: 'spawn_agents',
+    input: {
+      agents: Array.from({ length: n }, () => ({
+        agent_type: implementorAgent,
+      })),
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'spawn_agents'>
+
+  const implementorsResult = extractSpawnResults<string>(implementorsResult1)
+
+  // Extract all the reviews from the structured outputs
+  const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+  // Parse reviews from tool results
+  const reviews = implementorsResult.map((content, index) => ({
+    id: letters[index],
+    content,
+  }))
+
+  // Spawn selector with reviews as params
+  const { toolResult: selectorResult } = yield {
+    toolName: 'spawn_agents',
+    input: {
+      agents: [
+        {
+          agent_type: selectorAgent,
+          params: { reviews },
+        },
+      ],
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'spawn_agents'>
+
+  const selectorOutput = extractSpawnResults<{
+    reviewId: string
+    reasoning: string
+  }>(selectorResult)[0]
+
+  if ('errorMessage' in selectorOutput) {
+    yield {
+      toolName: 'set_output',
+      input: { error: selectorOutput.errorMessage },
+    } satisfies ToolCall<'set_output'>
+    return
+  }
+  const { reviewId } = selectorOutput
+  const chosenReview = reviews.find((review) => review.id === reviewId)
+  if (!chosenReview) {
+    yield {
+      toolName: 'set_output',
+      input: { error: 'Failed to find chosen review.' },
+    } satisfies ToolCall<'set_output'>
+    return
+  }
+
+  // Set output with the chosen review and reasoning
+  yield {
+    toolName: 'set_output',
+    input: {
+      response: chosenReview.content,
+      reasoning: selectorOutput.reasoning,
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'set_output'>
+
+  function extractSpawnResults<T>(
+    results: any[] | undefined,
+  ): (T | { errorMessage: string })[] {
+    if (!results) return []
+    const spawnedResults = results
+      .filter((result) => result.type === 'json')
+      .map((result) => result.value)
+      .flat() as {
+      agentType: string
+      value: { value?: T; errorMessage?: string }
+    }[]
+    return spawnedResults.map(
+      (result) =>
+        result.value.value ?? {
+          errorMessage:
+            result.value.errorMessage ?? 'Error extracting spawn results',
+        },
+    )
+  }
+}
+
+function* handleStepsGpt5({
+  agentState,
+  params,
+}: AgentStepContext): ReturnType<
+  NonNullable<SecretAgentDefinition['handleSteps']>
+> {
+  const implementorAgent = 'code-reviewer-implementor-gpt-5'
+  const selectorAgent = 'code-reviewer-selector-gpt-5'
+  const n = Math.min(10, Math.max(1, (params?.n as number | undefined) ?? 5))
+
+  // Remove userInstruction message for this agent.
+  const messages = agentState.messageHistory.concat()
+  messages.pop()
+  yield {
+    toolName: 'set_messages',
+    input: {
+      messages,
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'set_messages'>
+
+  const { toolResult: implementorsResult1 } = yield {
+    toolName: 'spawn_agents',
+    input: {
+      agents: Array.from({ length: n }, () => ({
+        agent_type: implementorAgent,
+      })),
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'spawn_agents'>
+
+  const implementorsResult = extractSpawnResults<string>(implementorsResult1)
+
+  // Extract all the reviews from the structured outputs
+  const letters = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
+  // Parse reviews from tool results
+  const reviews = implementorsResult.map((content, index) => ({
+    id: letters[index],
+    content,
+  }))
+
+  // Spawn selector with reviews as params
+  const { toolResult: selectorResult } = yield {
+    toolName: 'spawn_agents',
+    input: {
+      agents: [
+        {
+          agent_type: selectorAgent,
+          params: { reviews },
+        },
+      ],
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'spawn_agents'>
+
+  const selectorOutput = extractSpawnResults<{
+    reviewId: string
+    reasoning: string
+  }>(selectorResult)[0]
+
+  if ('errorMessage' in selectorOutput) {
+    yield {
+      toolName: 'set_output',
+      input: { error: selectorOutput.errorMessage },
+    } satisfies ToolCall<'set_output'>
+    return
+  }
+  const { reviewId } = selectorOutput
+  const chosenReview = reviews.find((review) => review.id === reviewId)
+  if (!chosenReview) {
+    yield {
+      toolName: 'set_output',
+      input: { error: 'Failed to find chosen review.' },
+    } satisfies ToolCall<'set_output'>
+    return
+  }
+
+  // Set output with the chosen review and reasoning
+  yield {
+    toolName: 'set_output',
+    input: {
+      response: chosenReview.content,
+      reasoning: selectorOutput.reasoning,
+    },
+    includeToolCall: false,
+  } satisfies ToolCall<'set_output'>
+
+  function extractSpawnResults<T>(
+    results: any[] | undefined,
+  ): (T | { errorMessage: string })[] {
+    if (!results) return []
+    const spawnedResults = results
+      .filter((result) => result.type === 'json')
+      .map((result) => result.value)
+      .flat() as {
+      agentType: string
+      value: { value?: T; errorMessage?: string }
+    }[]
+    return spawnedResults.map(
+      (result) =>
+        result.value.value ?? {
+          errorMessage:
+            result.value.errorMessage ?? 'Error extracting spawn results',
+        },
+    )
+  }
+}
+
+const definition = {
+  ...createCodeReviewerBestOfN('sonnet'),
+  id: 'code-reviewer-best-of-n',
+}
+export default definition