CodebuffAI
diff --git a/‎.agents/codebase-commands-explorer.ts‎
Lines changed: 227 additions & 0 deletions b/‎.agents/codebase-commands-explorer.ts‎
Lines changed: 227 additions & 0 deletions
diff --git a/‎.agents/git-committer.ts‎
Lines changed: 0 additions & 4 deletions b/‎.agents/git-committer.ts‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.agents/read-only-commander-lite.ts‎
Lines changed: 18 additions & 0 deletions b/‎.agents/read-only-commander-lite.ts‎
Lines changed: 18 additions & 0 deletions
diff --git a/‎.agents/base2/read-only-commander.ts‎ ‎.agents/read-only-commander.ts‎.agents/base2/read-only-commander.ts renamed to .agents/read-only-commander.ts
Lines changed: 3 additions & 3 deletions b/‎.agents/base2/read-only-commander.ts‎ ‎.agents/read-only-commander.ts‎.agents/base2/read-only-commander.ts renamed to .agents/read-only-commander.ts
Lines changed: 3 additions & 3 deletions
diff --git a/‎.agents/simple-code-reviewer.ts‎
Lines changed: 26 additions & 0 deletions b/‎.agents/simple-code-reviewer.ts‎
Lines changed: 26 additions & 0 deletions
diff --git a/‎.github/workflows/nightly-evals.yml‎
Lines changed: 2 additions & 2 deletions b/‎.github/workflows/nightly-evals.yml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎README.md‎
Lines changed: 4 additions & 0 deletions b/‎README.md‎
Lines changed: 4 additions & 0 deletions
@@ -0,0 +1,227 @@
+import type { AgentDefinition } from './types/agent-definition'
+
+const definition: AgentDefinition = {
+  id: 'codebase-commands-explorer',
+  displayName: 'Codebase Commands Explorer',
+  publisher: 'james',
+  model: 'openai/gpt-5',
+  reasoningOptions: {
+    enabled: true,
+    effort: 'low',
+    exclude: true,
+  },
+
+  spawnerPrompt: `Analyzes any project's codebase to comprehensively discover all commands needed to build, test, and run the project. Provides detailed analysis of project structure, tech stack, and working commands with confidence scores.`,
+
+  toolNames: ['spawn_agents', 'set_output'],
+  spawnableAgents: [
+    'codebuff/file-explorer@0.0.4',
+    'codebuff/read-only-commander-lite@0.0.1',
+  ],
+
+  inputSchema: {
+    prompt: {
+      type: 'string',
+      description:
+        'Optional specific focus areas or requirements for the codebase analysis (e.g., "focus on test commands" or "include CI/CD analysis")',
+    },
+  },
+
+  outputMode: 'structured_output',
+  outputSchema: {
+    type: 'object',
+    properties: {
+      projectOverview: {
+        type: 'object',
+        properties: {
+          projectType: {
+            type: 'string',
+            description:
+              'Primary project type (e.g., Node.js, Python, Rust, Go, etc.)',
+          },
+          techStack: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'List of technologies, frameworks, and tools detected',
+          },
+          packageManagers: {
+            type: 'array',
+            items: { type: 'string' },
+            description:
+              'Package managers found (npm, yarn, pnpm, pip, cargo, etc.)',
+          },
+          buildSystems: {
+            type: 'array',
+            items: { type: 'string' },
+            description:
+              'Build systems detected (webpack, vite, make, cmake, etc.)',
+          },
+          keyFiles: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'Key configuration files found',
+          },
+        },
+        required: [
+          'projectType',
+          'techStack',
+          'packageManagers',
+          'buildSystems',
+          'keyFiles',
+        ],
+      },
+      workingCommands: {
+        type: 'array',
+        items: {
+          type: 'object',
+          properties: {
+            command: { type: 'string', description: 'The working command' },
+            description: {
+              type: 'string',
+              description: 'What this command does',
+            },
+            category: {
+              type: 'string',
+              enum: [
+                'build',
+                'test',
+                'run',
+                'lint',
+                'format',
+                'install',
+                'clean',
+                'dev',
+              ],
+              description: 'Command category',
+            },
+            confidenceScore: {
+              type: 'number',
+              minimum: 0,
+              maximum: 1,
+              description: 'Confidence that this command works (0-1)',
+            },
+            workingDirectory: {
+              type: 'string',
+              description: 'Directory where command should be run',
+            },
+            prerequisites: {
+              type: 'array',
+              items: { type: 'string' },
+              description: 'Commands that should be run first',
+            },
+            environment: {
+              type: 'string',
+              description: 'Required environment or conditions',
+            },
+          },
+          required: ['command', 'description', 'category', 'confidenceScore'],
+        },
+      },
+      setupRequirements: {
+        type: 'array',
+        items: {
+          type: 'object',
+          properties: {
+            requirement: {
+              type: 'string',
+              description: 'Setup requirement description',
+            },
+            commands: {
+              type: 'array',
+              items: { type: 'string' },
+              description: 'Commands to fulfill this requirement',
+            },
+            priority: {
+              type: 'string',
+              enum: ['critical', 'recommended', 'optional'],
+              description: 'Priority level',
+            },
+          },
+          required: ['requirement', 'commands', 'priority'],
+        },
+      },
+      cicdAnalysis: {
+        type: 'object',
+        properties: {
+          ciFilesFound: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'CI/CD configuration files detected',
+          },
+          officialCommands: {
+            type: 'array',
+            items: { type: 'string' },
+            description: 'Commands found in CI/CD files',
+          },
+          platforms: {
+            type: 'array',
+            items: { type: 'string' },
+            description:
+              'CI/CD platforms detected (GitHub Actions, GitLab CI, etc.)',
+          },
+        },
+        required: ['ciFilesFound', 'officialCommands', 'platforms'],
+      },
+    },
+    required: [
+      'projectOverview',
+      'workingCommands',
+      'setupRequirements',
+      'cicdAnalysis',
+    ],
+  },
+
+  systemPrompt: `You are an expert codebase explorer that comprehensively analyzes any software project to discover all build, test, and run commands. You orchestrate multiple specialized agents to explore the project structure and test commands in parallel for maximum efficiency.`,
+
+  instructionsPrompt: `Your mission is to provide a comprehensive analysis of any codebase to discover all working commands for building, testing, and running the project.
+
+## Analysis Strategy:
+
+1. **Project Structure Exploration**: First spawn file-explorer to understand the project layout, key files, and technology stack.
+  In parallel, spawn a second file-explorer to learn about the build, lint, and testing processes across the codebase.
+
+2. **Massive Parallel Command Testing**: Only after fully completing step 1 and getting back the results, spawn MANY (10-15) read-only-commander agents simultaneously to test different command combinations, including for any relevant sub-directories if this is a monorepo.
+  Look for commands for the following project types:
+   - Web apps: next.js, react, vue, etc. commands (build, test, start, dev, lint, etc.)
+   - Node.js projects: npm/yarn/pnpm commands (build, test, start, dev, lint, etc.)
+   - Python projects: pip, pytest, setup.py, tox commands
+   - Rust projects: cargo commands (build, test, run, check, etc.)
+   ...And so on for all project types
+
+  Include CI/CD Analysis: Have agents examine CI/CD files (.github/workflows, .gitlab-ci.yml, etc.) to discover official build processes
+
+3. **Final Analysis**: Use the set_output tool to output the results of the analysis. Rate each working command based on:
+   - Success rate of execution
+   - Presence in official documentation/CI
+   - Standard conventions for the project type
+   - Output quality and expected behavior
+
+## Command Categories to Test:
+- **install**: Dependency installation commands
+- **build**: Compilation and build commands
+- **test**: All types of testing (unit, integration, e2e)
+- **run**: Application execution commands
+- **dev**: Development server/watch commands
+- **lint**: Code linting and static analysis
+- **format**: Code formatting commands
+- **clean**: Cleanup and reset commands
+
+## Be Extremely Thorough:
+- Try multiple package managers if multiple are detected
+- Test both short and long command forms
+- Check for custom scripts in package.json, Makefile, etc.
+- Test commands with different flags and options
+- Verify commands work from different directories
+- Check for environment-specific requirements
+
+## Special Focus Areas:
+- Look for monorepo structures and workspace commands
+- Detect containerized setups and associated commands
+- Find database setup/migration commands
+- Identify development vs production commands
+- Discover deployment and release commands
+
+Provide a comprehensive, structured output that gives developers everything they need to understand and work with the codebase immediately.`,
+}
+
+export default definition
@@ -38,17 +38,13 @@ const definition: AgentDefinition = {
       toolName: 'run_terminal_command',
       input: {
         command: 'git diff',
-        process_type: 'SYNC',
-        timeout_seconds: 30,
       },
     }
 
     yield {
       toolName: 'run_terminal_command',
       input: {
         command: 'git log --oneline -10',
-        process_type: 'SYNC',
-        timeout_seconds: 30,
       },
     }
 
 
@@ -0,0 +1,18 @@
+import { publisher } from './constants'
+import {
+  PLACEHOLDER,
+  type SecretAgentDefinition,
+} from './types/secret-agent-definition'
+import readOnlyCommander from './read-only-commander'
+
+const readOnlyCommanderLite: SecretAgentDefinition = {
+  ...readOnlyCommander,
+  id: 'read-only-commander-lite',
+  displayName: 'ReadOnly Commander Lite',
+  publisher,
+  model: 'x-ai/grok-code-fast-1',
+  spawnerPrompt:
+    'Can run quick read-only terminal commands and report back on the results. Has a basic understanding of the codebase. Is speedy and low-cost,',
+}
+
+export default readOnlyCommanderLite
@@ -1,8 +1,8 @@
-import { publisher } from '../constants'
+import { publisher } from './constants'
 import {
   PLACEHOLDER,
   type SecretAgentDefinition,
-} from '../types/secret-agent-definition'
+} from './types/secret-agent-definition'
 
 const readOnlyCommander: SecretAgentDefinition = {
   id: 'read-only-commander',
@@ -15,7 +15,7 @@ const readOnlyCommander: SecretAgentDefinition = {
   },
   displayName: 'ReadOnly Commander',
   spawnerPrompt:
-    'Can run quick read-only terminal commands and report back on the results. Has a basic understanding of the codebase.',
+    'Can run quick read-only terminal commands and report back on the results. Has a decent understanding of the codebase.',
   inputSchema: {
     prompt: {
       type: 'string',
 
@@ -0,0 +1,26 @@
+import type { AgentDefinition } from './types/agent-definition'
+
+const definition: AgentDefinition = {
+  id: 'simple-code-reviewer',
+  displayName: 'Simple Code Reviewer',
+  publisher: 'james',
+  model: 'anthropic/claude-sonnet-4',
+  toolNames: [
+    'read_files',
+    'code_search',
+    'run_terminal_command',
+    'spawn_agents',
+  ],
+  spawnableAgents: ['codebuff/file-explorer@0.0.2'],
+  spawnerPrompt: 'Spawn when you need to review local code changes',
+  systemPrompt:
+    'You are an expert software developer. Your job is to review local code changes and give helpful feedback.',
+  instructionsPrompt: `Instructions:
+  1. Use git diff to get the changes, but also get untracked files.
+  2. Read the files that have changed.
+  3. Spawn a file explorer to find all related and relevant files.
+  4. Read all the files that could be relevant to the changes.
+  5. Review the changes and suggest improvements.`,
+}
+
+export default definition
@@ -9,7 +9,7 @@ on:
 jobs:
   run-nightly-evals:
     runs-on: ubuntu-latest
-    timeout-minutes: 1440
+    timeout-minutes: 360 # 6 hours is the max for any hosted github action
     steps:
       - name: Checkout repository
         uses: actions/checkout@v3
@@ -47,7 +47,7 @@ jobs:
           echo "CODEBUFF_GITHUB_TOKEN=${{ secrets.CODEBUFF_GITHUB_TOKEN }}" >> $GITHUB_ENV
 
       - name: Run nightly evals
-        run: cd evals && bun run-eval-set --concurrency 3 --email --title "Nightly Eval Run ($(date '+%Y-%m-%d'))"
+        run: cd evals && bun run-eval-set --concurrency 10 --email --title "Nightly Eval Run ($(date '+%Y-%m-%d'))"
 
       - name: Workflow completed
         run: echo "Nightly evals workflow completed successfully"
@@ -174,3 +174,7 @@ Some ways you can help:
 **Contributing**: [CONTRIBUTING.md](./CONTRIBUTING.md) - Start here to contribute!
 
 **Support**: [support@codebuff.com](mailto:support@codebuff.com)
+
+## Star History
+
+[![Star History Chart](https://api.star-history.com/svg?repos=CodebuffAI/codebuff&type=Date)](https://www.star-history.com/#CodebuffAI/codebuff&Date)