refactor(agents): simplify CLI agent modes to work/review, add customization options

brandonkachen · brandonkachen · commit 5b7b14905d54 · 2026-01-20T12:24:14.000-08:00
- Remove test mode (redundant with work mode for e2e scenarios)
- Add defaultMode config option for agents to set their own default
- Add workModeInstructions/testModeInstructions config overrides
- Add CliAgentMode type and CLI_AGENT_MODES constant for type safety
- Extract getTestModeInstructions into separate function (then removed)
- Use CLI_AGENT_MODES constant instead of hardcoded arrays (DRY)
- Update prompts and schemas to reflect two-mode system
diff --git a/.agents/lib/cli-agent-prompts.ts b/.agents/lib/cli-agent-prompts.ts
@@ -1,4 +1,5 @@
 import type { CliAgentConfig } from './cli-agent-types'
+import { CLI_AGENT_MODES } from './cli-agent-types'
 
 const TMUX_SESSION_DOCS = `## Session Logs (Paper Trail)
 
@@ -71,33 +72,42 @@ The review should focus on these key areas:
    - Missing or incomplete type definitions`
 
 export function getSpawnerPrompt(config: CliAgentConfig): string {
-  const base = `Expert at testing ${config.cliName} CLI functionality using tmux, or performing code reviews via ${config.cliName}.
+  const defaultMode = config.defaultMode ?? 'work'
+  const modeDescriptions = {
+    work: `Use ${config.cliName} to implement features, fix bugs, refactor code, or complete other coding tasks.`,
+    review: `Uses ${config.cliName} CLI to perform code reviews on specified files or directories.`,
+  }
+  const modeLines = CLI_AGENT_MODES.map(mode => {
+    const isDefault = mode === defaultMode
+    return `- \`${mode}\`${isDefault ? ' (default)' : ''}: ${modeDescriptions[mode]}`
+  }).join('\n')
+
+  const base = `Expert at using ${config.cliName} CLI via tmux for implementation work or code reviews.
 
 **Modes:**
-- \`test\` (default): Spawns tmux sessions, sends input to ${config.cliName} CLI, captures terminal output, and validates behavior.
-- \`review\`: Uses ${config.cliName} CLI to perform code reviews on specified files or directories.
+${modeLines}
 
 **Paper trail:** Session logs are saved to \`debug/tmux-sessions/{session}/\`. Use \`read_files\` to view captures.
 
 **Your responsibilities as the parent agent:**
 1. If \`scriptIssues\` is not empty, fix the scripts in \`scripts/tmux/\` based on the suggested fixes
 2. Use \`read_files\` on the capture paths to see what the CLI displayed
-3. Re-run the test after fixing any script issues`
+3. Re-run the agent after fixing any script issues`
 
   return config.spawnerPromptExtras ? `${base}\n\n${config.spawnerPromptExtras}` : base
 }
 
 export function getSystemPrompt(config: CliAgentConfig): string {
   const cliSpecificSection = config.cliSpecificDocs ? `\n${config.cliSpecificDocs}\n` : '\n'
 
-  return `You are an expert at testing ${config.cliName} CLI using tmux. You have access to helper scripts that handle the complexities of tmux communication with TUI apps.
+  return `You are an expert at using ${config.cliName} CLI via tmux for implementation work and code reviews. You have access to helper scripts that handle the complexities of tmux communication with TUI apps.
 
 ## ${config.cliName} Startup
 
-For testing ${config.cliName}, use the \`--command\` flag with permission bypass:
+To start ${config.cliName}, use the \`--command\` flag with permission bypass:
 
 \`\`\`bash
-# Start ${config.cliName} CLI (with permission bypass for testing)
+# Start ${config.cliName} CLI (with permission bypass)
 SESSION=$(./scripts/tmux/tmux-cli.sh start --command "${config.startCommand}")
 
 # Or with specific options
@@ -108,12 +118,12 @@ SESSION=$(./scripts/tmux/tmux-cli.sh start --command "${config.startCommand} --h
 ${cliSpecificSection}
 ## Helper Scripts
 
-Use these scripts in \`scripts/tmux/\` for reliable CLI testing:
+Use these scripts in \`scripts/tmux/\` for reliable CLI interaction:
 
 ### Unified Script (Recommended)
 
 \`\`\`bash
-# Start a ${config.cliName} test session (with permission bypass)
+# Start a ${config.cliName} session (with permission bypass)
 SESSION=$(./scripts/tmux/tmux-cli.sh start --command "${config.startCommand}")
 
 # Send input to the CLI
@@ -162,7 +172,8 @@ ${TMUX_DEBUG_TIPS}`
 }
 
 export function getDefaultReviewModeInstructions(config: CliAgentConfig): string {
-  return `## Review Mode Instructions
+  const isDefault = config.defaultMode === 'review'
+  return `## Review Mode Instructions${isDefault ? ' (Default)' : ''}
 
 In review mode, you send a detailed review prompt to ${config.cliName}. The prompt MUST start with the word "review" and include specific areas of concern.
 
@@ -216,60 +227,98 @@ ${REVIEW_CRITERIA}
    \`\`\``
 }
 
-export function getInstructionsPrompt(config: CliAgentConfig): string {
-  const reviewModeInstructions = config.reviewModeInstructions ?? getDefaultReviewModeInstructions(config)
+export function getWorkModeInstructions(config: CliAgentConfig): string {
+  const isDefault = (config.defaultMode ?? 'work') === 'work'
+  return `## Work Mode Instructions${isDefault ? ' (Default)' : ''}
 
-  return `Instructions:
+Use ${config.cliName} to complete implementation tasks like building features, fixing bugs, or refactoring code.
 
-Check the \`mode\` parameter to determine your operation:
-- If \`mode\` is "review": follow **Review Mode** instructions
-- Otherwise: follow **Test Mode** instructions (default)
+### Workflow
 
----
+1. **Start ${config.cliName}** with permission bypass:
+   \`\`\`bash
+   SESSION=$(./scripts/tmux/tmux-cli.sh start --command "${config.startCommand}")
+   \`\`\`
 
-## Test Mode Instructions
+2. **Wait for CLI to initialize**, then capture:
+   \`\`\`bash
+   sleep 3
+   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "initial-state"
+   \`\`\`
+
+3. **Send your task** (from the prompt you received) to the CLI:
+   \`\`\`bash
+   ./scripts/tmux/tmux-cli.sh send "$SESSION" "<the task from your prompt parameter>"
+   \`\`\`
 
-1. **Use the helper scripts** in \`scripts/tmux/\` - they handle bracketed paste mode automatically
+   Use the exact task description from the prompt the parent agent gave you.
 
-2. **Start a ${config.cliName} test session** with permission bypass:
+4. **Wait for completion and capture output** (implementation tasks may take a while):
    \`\`\`bash
-   SESSION=$(./scripts/tmux/tmux-cli.sh start --command "${config.startCommand}")
+   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "work-in-progress" --wait 30
    \`\`\`
 
-3. **Verify the CLI started** by capturing initial output:
+   If the work is still in progress, wait and capture again:
    \`\`\`bash
-   ./scripts/tmux/tmux-cli.sh capture "$SESSION"
+   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "work-continued" --wait 30
    \`\`\`
 
-4. **Send commands** and capture responses:
+5. **Send follow-up prompts** if needed to refine or continue the work:
    \`\`\`bash
-   ./scripts/tmux/tmux-cli.sh send "$SESSION" "your command here"
-   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --wait 3
+   ./scripts/tmux/tmux-cli.sh send "$SESSION" "<follow-up instructions>"
+   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "follow-up" --wait 30
    \`\`\`
 
-5. **Always clean up** when done:
+6. **Verify the changes** by checking files or running commands:
    \`\`\`bash
-   ./scripts/tmux/tmux-cli.sh stop "$SESSION"
+   ./scripts/tmux/tmux-cli.sh send "$SESSION" "run the tests to verify the changes"
+   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "verification" --wait 60
    \`\`\`
 
-6. **Use labels when capturing** to create a clear paper trail:
+7. **Clean up** when done:
    \`\`\`bash
-   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "initial-state"
-   ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "after-help-command" --wait 2
+   ./scripts/tmux/tmux-cli.sh stop "$SESSION"
    \`\`\`
 
+### Tips
+
+- Break complex tasks into smaller prompts
+- Capture frequently to track progress
+- Use descriptive labels for captures
+- Check intermediate results before moving on`
+}
+
+export function getInstructionsPrompt(config: CliAgentConfig): string {
+  const defaultMode = config.defaultMode ?? 'work'
+  const workModeInstructions = config.workModeInstructions ?? getWorkModeInstructions(config)
+  const reviewModeInstructions = config.reviewModeInstructions ?? getDefaultReviewModeInstructions(config)
+
+  const modeNames = { work: 'Work Mode', review: 'Review Mode' }
+  const nonDefaultModes = CLI_AGENT_MODES.filter(m => m !== defaultMode)
+  const modeChecks = nonDefaultModes.map(m => `- If \`mode\` is "${m}": follow **${modeNames[m]}** instructions`).join('\n')
+
+  return `Instructions:
+
+Check the \`mode\` parameter to determine your operation:
+${modeChecks}
+- Otherwise: follow **${modeNames[defaultMode]}** instructions (default)
+
+---
+
+${workModeInstructions}
+
 ---
 
 ${reviewModeInstructions}
 
 ---
 
-## Output (Both Modes)
+## Output (All Modes)
 
 **Report results using set_output** - You MUST call set_output with structured results:
 - \`overallStatus\`: "success", "failure", or "partial"
-- \`summary\`: Brief description of what was tested/reviewed
-- \`testResults\`: Array of test outcomes (for test mode)
+- \`summary\`: Brief description of what was done
+- \`results\`: Array of task outcomes (for work mode)
 - \`scriptIssues\`: Array of any problems with the helper scripts
 - \`captures\`: Array of capture paths with labels
 - \`reviewFindings\`: Array of code review findings (for review mode)
@@ -278,7 +327,7 @@ ${reviewModeInstructions}
 - \`script\`: Which script failed
 - \`issue\`: What went wrong
 - \`errorOutput\`: The actual error message
-- \`suggestedFix\`: How the parent agent should fix the script
+- \`suggestedFix\`: How to fix the script
 
 **Always include captures** in your output so the parent agent can see what you saw.
 
diff --git a/.agents/lib/cli-agent-schemas.ts b/.agents/lib/cli-agent-schemas.ts
@@ -1,29 +1,29 @@
-// Shared output schema for CLI tester agents. testResults for test mode, reviewFindings for review mode.
+// Shared output schema for CLI agents. results for work mode, reviewFindings for review mode.
 export const outputSchema = {
   type: 'object' as const,
   properties: {
     overallStatus: {
       type: 'string' as const,
       enum: ['success', 'failure', 'partial'],
-      description: 'Overall test outcome',
+      description: 'Overall outcome',
     },
     summary: {
       type: 'string' as const,
-      description: 'Brief summary of what was tested and the outcome',
+      description: 'Brief summary of what was done and the outcome',
     },
-    testResults: {
+    results: {
       type: 'array' as const,
       items: {
         type: 'object' as const,
         properties: {
-          testName: { type: 'string' as const, description: 'Name/description of the test' },
-          passed: { type: 'boolean' as const, description: 'Whether the test passed' },
+          name: { type: 'string' as const, description: 'Name/description of the task' },
+          passed: { type: 'boolean' as const, description: 'Whether the task succeeded' },
           details: { type: 'string' as const, description: 'Details about what happened' },
           capturedOutput: { type: 'string' as const, description: 'Relevant output captured from the CLI' },
         },
-        required: ['testName', 'passed'],
+        required: ['name', 'passed'],
       },
-      description: 'Array of individual test results',
+      description: 'Array of individual task results',
     },
     scriptIssues: {
       type: 'array' as const,
@@ -37,7 +37,7 @@ export const outputSchema = {
         },
         required: ['script', 'issue', 'suggestedFix'],
       },
-      description: 'Issues encountered with the helper scripts that the parent agent should fix',
+      description: 'Issues encountered with the helper scripts that should be fixed',
     },
     captures: {
       type: 'array' as const,
diff --git a/.agents/lib/cli-agent-types.ts b/.agents/lib/cli-agent-types.ts
@@ -1,11 +1,20 @@
+export type CliAgentMode = 'work' | 'review'
+
+export const CLI_AGENT_MODES: readonly CliAgentMode[] = ['work', 'review'] as const
+
 export interface InputParamDefinition {
   type: 'string' | 'number' | 'boolean' | 'array' | 'object'
   description?: string
   enum?: string[]
 }
 
-// Prevent extraInputParams from overriding 'mode' at compile time
-export type ExtraInputParams = Omit<Record<string, InputParamDefinition>, 'mode'>
+/**
+ * Extra input params that can be added to CLI agent configs.
+ * Uses key remapping to exclude 'mode' at compile time (Omit on Record is a no-op).
+ */
+export type ExtraInputParams = {
+  [K in string as K extends 'mode' ? never : K]?: InputParamDefinition
+}
 
 export interface CliAgentConfig {
   id: string
@@ -16,8 +25,13 @@ export interface CliAgentConfig {
   startCommand: string
   permissionNote: string
   model: string
+  /** Default mode when mode param is not specified. Defaults to 'work' */
+  defaultMode?: CliAgentMode
   spawnerPromptExtras?: string
   extraInputParams?: ExtraInputParams
+  /** Custom instructions for work mode. If not provided, uses getWorkModeInstructions() */
+  workModeInstructions?: string
+  /** Custom instructions for review mode. If not provided, uses getDefaultReviewModeInstructions() */
   reviewModeInstructions?: string
   cliSpecificDocs?: string
 }
diff --git a/.agents/lib/create-cli-agent.ts b/.agents/lib/create-cli-agent.ts
@@ -1,5 +1,6 @@
 import type { AgentDefinition } from '../types/agent-definition'
 import type { CliAgentConfig } from './cli-agent-types'
+import { CLI_AGENT_MODES } from './cli-agent-types'
 import { outputSchema } from './cli-agent-schemas'
 import {
   getSpawnerPrompt,
@@ -15,11 +16,21 @@ export function createCliAgent(config: CliAgentConfig): AgentDefinition {
     )
   }
 
+  const defaultMode = config.defaultMode ?? 'work'
+  const modeDescriptions = {
+    work: 'implementation tasks',
+    review: `code review via ${config.cliName}`,
+  }
+  const modeDescParts = CLI_AGENT_MODES.map(mode => {
+    const isDefault = mode === defaultMode
+    return `"${mode}" for ${modeDescriptions[mode]}${isDefault ? ' (default)' : ''}`
+  })
+
   const baseInputParams = {
     mode: {
       type: 'string' as const,
-      enum: ['test', 'review'],
-      description: `Operation mode - "test" for CLI testing (default), "review" for code review via ${config.cliName}`,
+      enum: [...CLI_AGENT_MODES],
+      description: `Operation mode - ${modeDescParts.join(', ')}`,
     },
   }
 
@@ -38,7 +49,7 @@ export function createCliAgent(config: CliAgentConfig): AgentDefinition {
       prompt: {
         type: 'string' as const,
         description:
-          'Description of what to do. For test mode: what CLI functionality to test. For review mode: what code to review and any specific concerns.',
+          'Description of what to do. For work mode: implementation task to complete. For review mode: code to review.',
       },
       params: {
         type: 'object' as const,