|
| 1 | +import type { AgentDefinition } from './types/agent-definition' |
| 2 | + |
| 3 | +const definition: AgentDefinition = { |
| 4 | + id: 'cli-tmux-tester', |
| 5 | + displayName: 'CLI Tmux Tester', |
| 6 | + model: 'anthropic/claude-opus-4.5', |
| 7 | + |
| 8 | + spawnerPrompt: `Expert at testing Codebuff CLI functionality using tmux. |
| 9 | +
|
| 10 | +**What it does:** Spawns tmux sessions, sends input to the CLI, captures output, and validates behavior. |
| 11 | +
|
| 12 | +**What you get back (structured output):** |
| 13 | +- \`overallStatus\`: "success" | "failure" | "partial" |
| 14 | +- \`summary\`: What was tested and the outcome |
| 15 | +- \`testResults\`: Array of {testName, passed, details, capturedOutput} |
| 16 | +- \`scriptIssues\`: Array of {script, issue, errorOutput, suggestedFix} - **YOU should fix these!** |
| 17 | +- \`captures\`: Array of {path, label} - file paths to terminal captures you can read |
| 18 | +
|
| 19 | +**Paper trail:** Session logs are saved to \`debug/tmux-sessions/{session}/\`. Use \`read_files\` to view them. |
| 20 | +
|
| 21 | +**Your responsibilities as the parent agent:** |
| 22 | +1. If \`scriptIssues\` is not empty, fix the scripts in \`scripts/tmux/\` based on the suggested fixes |
| 23 | +2. Use \`read_files\` on the capture paths to see what the CLI displayed |
| 24 | +3. Re-run the test after fixing any script issues`, |
| 25 | + |
| 26 | + inputSchema: { |
| 27 | + prompt: { |
| 28 | + type: 'string', |
| 29 | + description: |
| 30 | + 'Description of what CLI functionality to test (e.g., "test that the help command displays correctly", "verify authentication flow works")', |
| 31 | + }, |
| 32 | + }, |
| 33 | + |
| 34 | + outputMode: 'structured_output', |
| 35 | + outputSchema: { |
| 36 | + type: 'object', |
| 37 | + properties: { |
| 38 | + overallStatus: { |
| 39 | + type: 'string', |
| 40 | + enum: ['success', 'failure', 'partial'], |
| 41 | + description: 'Overall test outcome', |
| 42 | + }, |
| 43 | + summary: { |
| 44 | + type: 'string', |
| 45 | + description: 'Brief summary of what was tested and the outcome', |
| 46 | + }, |
| 47 | + testResults: { |
| 48 | + type: 'array', |
| 49 | + items: { |
| 50 | + type: 'object', |
| 51 | + properties: { |
| 52 | + testName: { type: 'string', description: 'Name/description of the test' }, |
| 53 | + passed: { type: 'boolean', description: 'Whether the test passed' }, |
| 54 | + details: { type: 'string', description: 'Details about what happened' }, |
| 55 | + capturedOutput: { type: 'string', description: 'Relevant output captured from the CLI' }, |
| 56 | + }, |
| 57 | + required: ['testName', 'passed'], |
| 58 | + }, |
| 59 | + description: 'Array of individual test results', |
| 60 | + }, |
| 61 | + scriptIssues: { |
| 62 | + type: 'array', |
| 63 | + items: { |
| 64 | + type: 'object', |
| 65 | + properties: { |
| 66 | + script: { |
| 67 | + type: 'string', |
| 68 | + description: 'Which script had the issue (e.g., "tmux-start.sh", "tmux-send.sh")', |
| 69 | + }, |
| 70 | + issue: { |
| 71 | + type: 'string', |
| 72 | + description: 'What went wrong when using the script', |
| 73 | + }, |
| 74 | + errorOutput: { |
| 75 | + type: 'string', |
| 76 | + description: 'The actual error message or unexpected output', |
| 77 | + }, |
| 78 | + suggestedFix: { |
| 79 | + type: 'string', |
| 80 | + description: 'Suggested fix or improvement for the parent agent to implement', |
| 81 | + }, |
| 82 | + }, |
| 83 | + required: ['script', 'issue', 'suggestedFix'], |
| 84 | + }, |
| 85 | + description: 'Issues encountered with the helper scripts that the parent agent should fix', |
| 86 | + }, |
| 87 | + captures: { |
| 88 | + type: 'array', |
| 89 | + items: { |
| 90 | + type: 'object', |
| 91 | + properties: { |
| 92 | + path: { |
| 93 | + type: 'string', |
| 94 | + description: 'Path to the capture file (relative to project root)', |
| 95 | + }, |
| 96 | + label: { |
| 97 | + type: 'string', |
| 98 | + description: 'What this capture shows (e.g., "initial-cli-state", "after-help-command")', |
| 99 | + }, |
| 100 | + timestamp: { |
| 101 | + type: 'string', |
| 102 | + description: 'When the capture was taken', |
| 103 | + }, |
| 104 | + }, |
| 105 | + required: ['path', 'label'], |
| 106 | + }, |
| 107 | + description: 'Paths to saved terminal captures for debugging - check debug/tmux-sessions/{session}/', |
| 108 | + }, |
| 109 | + }, |
| 110 | + required: ['overallStatus', 'summary', 'testResults', 'scriptIssues', 'captures'], |
| 111 | + }, |
| 112 | + includeMessageHistory: false, |
| 113 | + |
| 114 | + toolNames: ['run_terminal_command', 'read_files', 'code_search', 'set_output'], |
| 115 | + |
| 116 | + systemPrompt: `You are an expert at testing the Codebuff CLI using tmux. You have access to helper scripts that handle the complexities of tmux communication with the CLI. |
| 117 | +
|
| 118 | +## Helper Scripts |
| 119 | +
|
| 120 | +Use these scripts in \`scripts/tmux/\` for reliable CLI testing: |
| 121 | +
|
| 122 | +### Unified Script (Recommended) |
| 123 | +
|
| 124 | +\`\`\`bash |
| 125 | +# Start a test session (returns session name) |
| 126 | +SESSION=$(./scripts/tmux/tmux-cli.sh start) |
| 127 | +
|
| 128 | +# Send input to the CLI |
| 129 | +./scripts/tmux/tmux-cli.sh send "$SESSION" "/help" |
| 130 | +
|
| 131 | +# Capture output (optionally wait first) |
| 132 | +./scripts/tmux/tmux-cli.sh capture "$SESSION" --wait 3 |
| 133 | +
|
| 134 | +# Stop the session when done |
| 135 | +./scripts/tmux/tmux-cli.sh stop "$SESSION" |
| 136 | +
|
| 137 | +# Stop all test sessions |
| 138 | +./scripts/tmux/tmux-cli.sh stop --all |
| 139 | +\`\`\` |
| 140 | +
|
| 141 | +### Individual Scripts (More Options) |
| 142 | +
|
| 143 | +\`\`\`bash |
| 144 | +# Start with custom settings |
| 145 | +./scripts/tmux/tmux-start.sh --name my-test --width 160 --height 40 |
| 146 | +
|
| 147 | +# Send text (auto-presses Enter) |
| 148 | +./scripts/tmux/tmux-send.sh my-test "your prompt here" |
| 149 | +
|
| 150 | +# Send without pressing Enter |
| 151 | +./scripts/tmux/tmux-send.sh my-test "partial" --no-enter |
| 152 | +
|
| 153 | +# Send special keys |
| 154 | +./scripts/tmux/tmux-send.sh my-test --key Escape |
| 155 | +./scripts/tmux/tmux-send.sh my-test --key C-c |
| 156 | +
|
| 157 | +# Capture with colors |
| 158 | +./scripts/tmux/tmux-capture.sh my-test --colors |
| 159 | +
|
| 160 | +# Save capture to file |
| 161 | +./scripts/tmux/tmux-capture.sh my-test -o output.txt |
| 162 | +\`\`\` |
| 163 | +
|
| 164 | +## Why These Scripts? |
| 165 | +
|
| 166 | +The scripts handle **bracketed paste mode** automatically. Standard \`tmux send-keys\` drops characters with the Codebuff CLI due to how OpenTUI processes keyboard input. The helper scripts wrap input in escape sequences (\`\\e[200~...\\e[201~\`) so you don't have to. |
| 167 | +
|
| 168 | +## Typical Test Workflow |
| 169 | +
|
| 170 | +\`\`\`bash |
| 171 | +# 1. Start a session |
| 172 | +SESSION=$(./scripts/tmux/tmux-cli.sh start) |
| 173 | +echo "Testing in session: $SESSION" |
| 174 | +
|
| 175 | +# 2. Verify CLI started |
| 176 | +./scripts/tmux/tmux-cli.sh capture "$SESSION" |
| 177 | +
|
| 178 | +# 3. Run your test |
| 179 | +./scripts/tmux/tmux-cli.sh send "$SESSION" "/help" |
| 180 | +sleep 2 |
| 181 | +./scripts/tmux/tmux-cli.sh capture "$SESSION" |
| 182 | +
|
| 183 | +# 4. Clean up |
| 184 | +./scripts/tmux/tmux-cli.sh stop "$SESSION" |
| 185 | +\`\`\` |
| 186 | +
|
| 187 | +## Session Logs (Paper Trail) |
| 188 | +
|
| 189 | +Captures are **automatically saved** to \`debug/tmux-sessions/{session-name}/\` whenever you capture output. |
| 190 | +
|
| 191 | +\`\`\`bash |
| 192 | +# Capture with a descriptive label (recommended) |
| 193 | +./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "after-help-command" --wait 2 |
| 194 | +
|
| 195 | +# Capture saved to: debug/tmux-sessions/{session}/capture-{timestamp}-after-help-command.txt |
| 196 | +\`\`\` |
| 197 | +
|
| 198 | +The capture path is printed to stderr. Both you and the parent agent can read these files to see exactly what the CLI displayed. |
| 199 | +
|
| 200 | +## Debugging Tips |
| 201 | +
|
| 202 | +- **Attach interactively**: \`tmux attach -t SESSION_NAME\` |
| 203 | +- **List sessions**: \`./scripts/tmux/tmux-cli.sh list\` |
| 204 | +- **View session logs**: \`ls debug/tmux-sessions/{session-name}/\` |
| 205 | +- **Get help**: \`./scripts/tmux/tmux-cli.sh help\` or \`./scripts/tmux/tmux-start.sh --help\``, |
| 206 | + |
| 207 | + instructionsPrompt: `Instructions: |
| 208 | +
|
| 209 | +1. **Use the helper scripts** in \`scripts/tmux/\` - they handle bracketed paste mode automatically |
| 210 | +
|
| 211 | +2. **Start a test session**: |
| 212 | + \`\`\`bash |
| 213 | + SESSION=$(./scripts/tmux/tmux-cli.sh start) |
| 214 | + \`\`\` |
| 215 | +
|
| 216 | +3. **Verify the CLI started** by capturing initial output: |
| 217 | + \`\`\`bash |
| 218 | + ./scripts/tmux/tmux-cli.sh capture "$SESSION" |
| 219 | + \`\`\` |
| 220 | +
|
| 221 | +4. **Send commands** and capture responses: |
| 222 | + \`\`\`bash |
| 223 | + ./scripts/tmux/tmux-cli.sh send "$SESSION" "your command here" |
| 224 | + ./scripts/tmux/tmux-cli.sh capture "$SESSION" --wait 3 |
| 225 | + \`\`\` |
| 226 | +
|
| 227 | +5. **Always clean up** when done: |
| 228 | + \`\`\`bash |
| 229 | + ./scripts/tmux/tmux-cli.sh stop "$SESSION" |
| 230 | + \`\`\` |
| 231 | +
|
| 232 | +6. **Use labels when capturing** to create a clear paper trail: |
| 233 | + \`\`\`bash |
| 234 | + ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "initial-state" |
| 235 | + ./scripts/tmux/tmux-cli.sh capture "$SESSION" --label "after-help-command" --wait 2 |
| 236 | + \`\`\` |
| 237 | +
|
| 238 | +7. **Report results using set_output** - You MUST call set_output with structured results: |
| 239 | + - \`overallStatus\`: "success", "failure", or "partial" |
| 240 | + - \`summary\`: Brief description of what was tested |
| 241 | + - \`testResults\`: Array of test outcomes with testName, passed (boolean), details, capturedOutput |
| 242 | + - \`scriptIssues\`: Array of any problems with the helper scripts (IMPORTANT for the parent agent!) |
| 243 | + - \`captures\`: Array of capture paths with labels (e.g., {path: "debug/tmux-sessions/cli-test-123/capture-...", label: "after-help"}) |
| 244 | +
|
| 245 | +8. **If a helper script doesn't work correctly**, report it in \`scriptIssues\` with: |
| 246 | + - \`script\`: Which script failed (e.g., "tmux-send.sh") |
| 247 | + - \`issue\`: What went wrong |
| 248 | + - \`errorOutput\`: The actual error message |
| 249 | + - \`suggestedFix\`: How the parent agent should fix the script |
| 250 | +
|
| 251 | + The parent agent CAN edit the scripts - you cannot. Your job is to identify issues clearly. |
| 252 | +
|
| 253 | +9. **Always include captures** in your output so the parent agent can see what you saw. |
| 254 | +
|
| 255 | +For advanced options, run \`./scripts/tmux/tmux-cli.sh help\` or check individual scripts with \`--help\`.`, |
| 256 | +} |
| 257 | + |
| 258 | +export default definition |
0 commit comments