base2-with-task-researcher (and context restarts!)

jahooma · jahooma · commit ebe87a27884a · 2025-10-19T00:40:57.000-07:00
diff --git a/.agents/base2/base2-implementor-gpt-5.ts b/.agents/base2/base2-implementor-gpt-5.ts
diff --git a/.agents/base2/base2-with-task-researcher.ts b/.agents/base2/base2-with-task-researcher.ts
@@ -0,0 +1,205 @@
+import { buildArray } from '@codebuff/common/util/array'
+
+import { publisher } from '../constants'
+import {
+  PLACEHOLDER,
+  type SecretAgentDefinition,
+} from '../types/secret-agent-definition'
+import { ToolCall } from 'types/agent-definition'
+
+export const createBase2WithTaskResearcher: () => Omit<
+  SecretAgentDefinition,
+  'id'
+> = () => {
+  return {
+    publisher,
+    model: 'anthropic/claude-sonnet-4.5',
+    displayName: 'Buffy the Orchestrator',
+    spawnerPrompt:
+      'Advanced base agent that orchestrates planning, editing, and reviewing for complex coding tasks',
+    inputSchema: {
+      prompt: {
+        type: 'string',
+        description: 'A coding task to complete',
+      },
+      params: {
+        type: 'object',
+        properties: {
+          maxContextLength: {
+            type: 'number',
+          },
+        },
+        required: [],
+      },
+    },
+    outputMode: 'last_message',
+    includeMessageHistory: false,
+    toolNames: ['spawn_agents', 'read_files', 'str_replace', 'write_file'],
+    spawnableAgents: buildArray(
+      'task-researcher',
+      'file-picker-max',
+      'code-searcher',
+      'directory-lister',
+      'glob-matcher',
+      'researcher-web',
+      'researcher-docs',
+      'commander',
+      'code-reviewer',
+      'validator',
+      'context-pruner',
+    ),
+
+    systemPrompt: `You are Buffy, a strategic coding assistant that orchestrates complex coding tasks through specialized sub-agents.
+
+# Layers
+
+You spawn agents in "layers". Each layer is one spawn_agents tool call composed of multiple agents that answer your questions, do research, edit, and review.
+
+In between layers, you are encouraged to use the read_files tool to read files that you think are relevant to the user's request. It's good to read as many files as possible in between layers as this will give you more context on the user request.
+
+Continue to spawn layers of agents until have completed the user's request or require more information from the user.
+
+## Spawning agents guidelines
+
+
+- **Sequence agents properly:** Keep in mind dependencies when spawning different agents. Don't spawn agents in parallel that depend on each other. Be conservative sequencing agents so they can build on each other's insights:
+  - **Task researcher:** For medium to complex requests, you should first spawn a task-researcher agent by itself to gather context about the user's request. Spawn this before any other agents.
+  - Spawn file pickers, code-searcher, directory-lister, glob-matcher, commanders, and researchers before making edits.
+  - Spawn generate-plan agent after you have gathered all the context you need (and not before!).
+  - Only make edits after generating a plan.
+  - Code reviewers/validators should be spawned after you have made your edits.
+- **No need to include context:** When prompting an agent, realize that many agents can already see the entire conversation history, so you can be brief in prompting them without needing to include context.
+- **Don't spawn code reviewers/validators for trivial changes or quick follow-ups:** You should spawn the code reviewer/validator for most changes, but not for little changes or simple follow-ups.
+
+# Core Mandates
+
+- **Tone:** Adopt a professional, direct, and concise tone suitable for a CLI environment.
+- **Understand first, act second:** Always gather context and read relevant files BEFORE editing files.
+- **Quality over speed:** Prioritize correctness over appearing productive. Fewer, well-informed agents are better than many rushed ones.
+- **Validate assumptions:** Use researchers, file pickers, and the read_files tool to verify assumptions about libraries and APIs before implementing.
+- **Proactiveness:** Fulfill the user's request thoroughly, including reasonable, directly implied follow-up actions.
+- **Be careful about terminal commands:** Be careful about instructing subagents to run terminal commands that could be destructive or have effects that are hard to undo (e.g. git push, running scripts that could alter production environments, installing packages globally, etc). Don't do any of these unless the user explicitly asks you to.
+- **Do what the user asks:** If the user asks you to do something, even running a risky terminal command, do it.
+- **Make at least one tool call in every step:** You *must* make at least one tool call (with "<codebuff_tool_call>" tags) in every step unless you are done with the task. If you don't, you will be cut off by the system and the task will be incomplete.
+
+# Code Editing Mandates
+
+- **Conventions:** Rigorously adhere to existing project conventions when reading or modifying code. Analyze surrounding code, tests, and configuration first.
+- **Libraries/Frameworks:** NEVER assume a library/framework is available or appropriate. Verify its established usage within the project (check imports, configuration files like 'package.json', 'Cargo.toml', 'requirements.txt', 'build.gradle', etc., or observe neighboring files) before employing it.
+- **Style & Structure:** Mimic the style (formatting, naming), structure, framework choices, typing, and architectural patterns of existing code in the project.
+- **Idiomatic Changes:** When editing, understand the local context (imports, functions/classes) to ensure your changes integrate naturally and idiomatically.
+- **No new code comments:** Do not add any new comments while writing code, unless they were preexisting comments (keep those!) or unless the user asks you to add comments!
+- **Minimal Changes:** Make as few changes as possible to satisfy the user request! Don't go beyond what the user has asked for.
+- **Code Reuse:** Always reuse helper functions, components, classes, etc., whenever possible! Don't reimplement what already exists elsewhere in the codebase.
+- **Front end development** We want to make the UI look as good as possible. Don't hold back. Give it your all.
+    - Include as many relevant features and interactions as possible
+    - Add thoughtful details like hover states, transitions, and micro-interactions
+    - Apply design principles: hierarchy, contrast, balance, and movement
+    - Create an impressive demonstration showcasing web development capabilities
+-  **Refactoring Awareness:** Whenever you modify an exported symbol like a function or class or variable, you should find and update all the references to it appropriately.
+-  **Package Management:** When adding new packages, use the run_terminal_command tool to install the package rather than editing the package.json file with a guess at the version number to use (or similar for other languages). This way, you will be sure to have the latest version of the package. Do not install packages globally unless asked by the user (e.g. Don't run \`npm install -g <package-name>\`). Always try to use the package manager associated with the project (e.g. it might be \`pnpm\` or \`bun\` or \`yarn\` instead of \`npm\`, or similar for other languages).
+-  **Code Hygiene:** Make sure to leave things in a good state:
+    - Don't forget to add any imports that might be needed
+    - Remove unused variables, functions, and files as a result of your changes.
+    - If you added files or functions meant to replace existing code, then you should also remove the previous code.
+- **Edit multiple files at once:** When you edit files, you must make as many tool calls as possible in a single message. This is faster and much more efficient than making all the tool calls in separate messages. It saves users thousands of dollars in credits if you do this!
+
+${PLACEHOLDER.FILE_TREE_PROMPT_SMALL}
+${PLACEHOLDER.KNOWLEDGE_FILES_CONTENTS}
+
+# Initial Git Changes
+
+The following is the state of the git repository at the start of the conversation. Note that it is not updated to reflect any subsequent changes made by the user or the agents.
+
+${PLACEHOLDER.GIT_CHANGES_PROMPT}
+`,
+
+    instructionsPrompt: `Orchestrate the completion of the user's request using your specialized sub-agents. Take your time and be comprehensive.
+    
+## Example workflow
+
+The user asks you to implement a new feature. You respond in multiple steps:
+
+1. Spawn a task-researcher agent to research the task and get key facts and insights.
+2. Use the str_replace or write_file tool to make the changes.
+3. Spawn a code-reviewer to review the changes. Consider making changes suggested by the code-reviewer.
+4. Spawn a validator to run validation checks (tests, typechecks, etc.) to ensure the changes are correct.
+
+You may not need to spawn the task-researcher if the user's request is trivial or if you have already gathered all the information you need from the conversation history.
+`,
+
+    stepPrompt: `Don't forget to spawn agents that could help, especially: the task-researcher to research the task, code-reviewer to review changes, and the validator to run validation commands.`,
+
+    handleSteps: function* ({ params, logger }) {
+      let steps = 0
+      while (true) {
+        steps++
+        // Run context-pruner before each step
+        yield {
+          toolName: 'spawn_agent_inline',
+          input: {
+            agent_type: 'context-pruner',
+            params: params ?? {},
+          },
+          includeToolCall: false,
+        } as any
+
+        const { stepsComplete, agentState } = yield 'STEP'
+        if (stepsComplete) break
+
+        // Check tool results for spawning of a task researcher...
+        // If found, reset messages to only include the task researcher's result and read the relevant files!
+        const spawnAgentsToolResults = agentState.messageHistory
+          .filter((message) => message.role === 'tool')
+          .filter((message) => message.content.toolName === 'spawn_agents')
+          .map((message) => message.content.output)
+          .flat()
+          .filter((result) => result.type === 'json')
+          .map((result) => result.value)[0] as {
+          agentType: string
+          value: any
+        }[]
+
+        const taskResearcherResult = spawnAgentsToolResults?.find(
+          (result) => result.agentType === 'task-researcher',
+        )
+        if (taskResearcherResult) {
+          const taskResearcherOutput = taskResearcherResult.value.value as {
+            analysis: string
+            keyFacts: string[]
+            relevantFiles: string[]
+            userPrompt: string
+          }
+          const initialMessage = `<research>${taskResearcherOutput.keyFacts.join('\n')}</research>${taskResearcherOutput.userPrompt}`
+          const message = {
+            role: 'user',
+            content: initialMessage,
+          }
+          const instructionsMessage = agentState.messageHistory.findLast(
+            (message) =>
+              message.role === 'user' &&
+              message.keepLastTags?.[0] === 'INSTRUCTIONS_PROMPT',
+          )
+          yield {
+            toolName: 'set_messages',
+            input: {
+              messages: [message, instructionsMessage],
+            },
+            includeToolCall: false,
+          } satisfies ToolCall<'set_messages'>
+          yield {
+            toolName: 'read_files',
+            input: { paths: taskResearcherOutput.relevantFiles },
+          } satisfies ToolCall<'read_files'>
+        }
+        // Continue loop!
+      }
+    },
+  }
+}
+
+const definition = {
+  ...createBase2WithTaskResearcher(),
+  id: 'base2-with-task-researcher',
+}
+export default definition
diff --git a/.agents/researcher/task-researcher.ts b/.agents/researcher/task-researcher.ts
@@ -5,6 +5,7 @@ import {
   PLACEHOLDER,
   type SecretAgentDefinition,
 } from '../types/secret-agent-definition'
+import { ToolCall, AgentState } from '../types/agent-definition'
 
 export const createTaskResearcher: () => Omit<
   SecretAgentDefinition,
@@ -21,16 +22,8 @@ export const createTaskResearcher: () => Omit<
         type: 'string',
         description: 'A coding task to research',
       },
-      params: {
-        type: 'object',
-        properties: {
-          maxContextLength: {
-            type: 'number',
-          },
-        },
-        required: [],
-      },
     },
+    includeMessageHistory: true,
     outputMode: 'structured_output',
     outputSchema: {
       type: 'object',
@@ -52,9 +45,8 @@ export const createTaskResearcher: () => Omit<
             'A comprehensive list of the paths of files that are relevant to the coding task.',
         },
       },
-      required: ['report'],
+      required: ['analysis', 'keyFacts', 'relevantFiles'],
     },
-    includeMessageHistory: false,
     toolNames: ['spawn_agents', 'read_files', 'set_output'],
     spawnableAgents: buildArray(
       'file-picker-max',
@@ -93,11 +85,50 @@ You recieve a coding task to implement a new feature. You do research in multipl
 2a. Read all the relevant files using the read_files tool.
 3. Spawn a decomposing-thinker agent to help figure out key facts and insights about the coding task.
 3a. Read any remaining relevant files using the read_files tool.
-4. Now the most important part: use the set_output tool to compile the information into a final report. Start with the analysis, and then put the most effort into the key facts list, which should be comprehensive. Finally, include ALL the relevant files in the report.`,
+4. Now the most important part: use the set_output tool to compile the information into a final report. Start with the analysis, and then put the most effort into the key facts list, which should be comprehensive. Finally, include ALL the relevant files in the report.
+Important: the report should only include the analysis, key facts, and relevant files. It should not include a plan or recommendations or any other information.
+5. End your turn.
+`,
 
     stepPrompt: `Don't forget to spawn agents that could help, especially: the file-picker-max and find-all-referencer to get codebase context, and the decomposing-thinker agent to help figure out key facts and insights.`,
 
-    handleSteps: function* ({ prompt, params }) {
+    handleSteps: function* ({ prompt, agentState, logger }) {
+      // Reset the message history to the initial user message and the input prompt.
+      const { messageHistory } = agentState
+      const instructionsMessage = messageHistory[messageHistory.length - 1]
+      const userMessages = messageHistory.filter(
+        (message) =>
+          message.role === 'user' &&
+          (typeof message.content === 'string'
+            ? message.content.includes('<user_message>')
+            : message.content[0].type === 'text' &&
+              message.content[0].text.includes('<user_message>')),
+      )
+      const lastUserMessage = userMessages[userMessages.length - 2]
+      let userPrompt = prompt
+      if (lastUserMessage) {
+        const userMessageBlock =
+          typeof lastUserMessage.content === 'string'
+            ? lastUserMessage.content
+            : (lastUserMessage.content as any)[0].text || ''
+        const userMessage = userMessageBlock
+          .split('<user_message>')[1]
+          .split('</user_message>')[0]
+          .trim()
+        userPrompt = `<user_message>${userMessage}<generated_summary_of_task>${prompt}</generated_summary_of_task></user_message>`
+        yield {
+          toolName: 'set_messages',
+          input: {
+            messages: [
+              { role: 'user', content: userPrompt },
+              instructionsMessage,
+            ],
+          },
+          includeToolCall: false,
+        } satisfies ToolCall
+      }
+
+      let lastAgentState: AgentState
       let steps = 0
       while (true) {
         steps++
@@ -106,14 +137,24 @@ You recieve a coding task to implement a new feature. You do research in multipl
           toolName: 'spawn_agent_inline',
           input: {
             agent_type: 'context-pruner',
-            params: params ?? {},
+            params: {},
           },
           includeToolCall: false,
         } as any
 
-        const { stepsComplete } = yield 'STEP'
+        const { stepsComplete, agentState } = yield 'STEP'
+        lastAgentState = agentState
         if (stepsComplete) break
       }
+
+      yield {
+        toolName: 'set_output',
+        input: {
+          ...lastAgentState.output,
+          userPrompt,
+        },
+        includeToolCall: false,
+      } satisfies ToolCall<'set_output'>
     },
   }
 }