CodebuffAI
diff --git a/‎.agents/LESSONS.md‎
Lines changed: 125 additions & 44 deletions b/‎.agents/LESSONS.md‎
Lines changed: 125 additions & 44 deletions
diff --git a/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 1 deletion b/‎CONTRIBUTING.md‎
Lines changed: 3 additions & 1 deletion
diff --git a/‎common/src/mcp/client.ts‎
Lines changed: 4 additions & 3 deletions b/‎common/src/mcp/client.ts‎
Lines changed: 4 additions & 3 deletions
diff --git a/‎evals/buffbench/eval-codebuff.json‎
Lines changed: 2 additions & 5 deletions b/‎evals/buffbench/eval-codebuff.json‎
Lines changed: 2 additions & 5 deletions
diff --git a/‎evals/buffbench/gen-evals.ts‎
Lines changed: 17 additions & 21 deletions b/‎evals/buffbench/gen-evals.ts‎
Lines changed: 17 additions & 21 deletions
diff --git a/‎evals/buffbench/pick-commits.ts‎
Lines changed: 1 addition & 1 deletion b/‎evals/buffbench/pick-commits.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎evals/buffbench/run-buffbench.ts‎
Lines changed: 17 additions & 10 deletions b/‎evals/buffbench/run-buffbench.ts‎
Lines changed: 17 additions & 10 deletions
diff --git a/‎evals/scaffolding.ts‎
Lines changed: 1 addition & 3 deletions b/‎evals/scaffolding.ts‎
Lines changed: 1 addition & 3 deletions
diff --git a/‎evals/subagents/eval-planner.ts‎
Lines changed: 5 additions & 3 deletions b/‎evals/subagents/eval-planner.ts‎
Lines changed: 5 additions & 3 deletions
diff --git a/‎evals/test-setup.ts‎
Lines changed: 1 addition & 1 deletion b/‎evals/test-setup.ts‎
Lines changed: 1 addition & 1 deletion
@@ -51,12 +51,14 @@ Before you begin, you'll need to install a few tools:
 5. **Start development services**:
 
    **Option A: All-in-one (recommended)**
+
    ```bash
    bun run dev
    # Starts the web server, builds the SDK, and launches the CLI automatically
    ```
 
    **Option B: Separate terminals (for more control)**
+
    ```bash
    # Terminal 1 - Web server (start first)
    bun run start-web
@@ -201,7 +203,7 @@ cd cli
 bun run test:tmux-poc
 ```
 
-See [cli/src/__tests__/README.md](cli/src/__tests__/README.md) for comprehensive interactive testing documentation.
+See [cli/src/**tests**/README.md](cli/src/__tests__/README.md) for comprehensive interactive testing documentation.
 
 ### Commit Messages
 
 
@@ -154,9 +154,10 @@ export async function callMCPTool(
         mediaType: c.resource.mimeType ?? 'text/plain',
       } satisfies ToolResultOutput
     }
-    const fallbackValue = 'uri' in c && typeof (c as { uri: unknown }).uri === 'string' 
-      ? (c as { uri: string }).uri 
-      : JSON.stringify(c)
+    const fallbackValue =
+      'uri' in c && typeof (c as { uri: unknown }).uri === 'string'
+        ? (c as { uri: string }).uri
+        : JSON.stringify(c)
     return {
       type: 'json',
       value: fallbackValue,
 
@@ -9,10 +9,7 @@
     }
   ],
   "initCommand": "bun install && git checkout -- bun.lock",
-  "finalCheckCommands": [
-    "bun run typecheck",
-    "bun run test"
-  ],
+  "finalCheckCommands": ["bun run typecheck", "bun run test"],
   "env": {
     "ANTHROPIC_API_KEY": "test-key",
     "ANTHROPIC_API_KEY2": "test-key-2",
@@ -3193,4 +3190,4 @@
       ]
     }
   ]
-}
+}
@@ -1,15 +1,15 @@
 import { execSync } from 'child_process'
-import { createTwoFilesPatch } from 'diff'
 import fs from 'fs'
 import path from 'path'
-import { mapLimit } from 'async'
 
 import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
-
 import { CodebuffClient, getUserCredentials } from '@codebuff/sdk'
+import { mapLimit } from 'async'
+import { createTwoFilesPatch } from 'diff'
+
+import { generateEvalTask } from './eval-task-generator'
 import { extractRepoNameFromUrl } from './setup-test-repo'
 import { withTestRepoAndParent } from '../subagents/test-repo-utils'
-import { generateEvalTask } from './eval-task-generator'
 
 import type { EvalDataV2, EvalCommitV2, FileDiff } from './types'
 
@@ -207,24 +207,20 @@ export async function generateEvalFileV2({
     )
   }
 
-  const batchResults = await mapLimit(
-    commitShas,
-    BATCH_SIZE,
-    async (commitSha: string) => {
-      const result = await processCommit(commitSha)
-      if (result) {
-        evalCommits.push(result)
-
-        const partialEvalData: EvalDataV2 = {
-          repoUrl,
-          generationDate: new Date().toISOString(),
-          evalCommits: [...evalCommits],
-        }
-        savePartialResults(partialOutputPath, partialEvalData)
+  await mapLimit(commitShas, BATCH_SIZE, async (commitSha: string) => {
+    const result = await processCommit(commitSha)
+    if (result) {
+      evalCommits.push(result)
+
+      const partialEvalData: EvalDataV2 = {
+        repoUrl,
+        generationDate: new Date().toISOString(),
+        evalCommits: [...evalCommits],
       }
-      return result
-    },
-  )
+      savePartialResults(partialOutputPath, partialEvalData)
+    }
+    return result
+  })
 
   const evalData: EvalDataV2 = {
     repoUrl,
 
@@ -5,9 +5,9 @@ import fs from 'fs'
 import path from 'path'
 
 import { disableLiveUserInputCheck } from '@codebuff/agent-runtime/live-user-inputs'
-import { promptAiSdkStructured } from '@codebuff/sdk/impl/llm'
 import { models } from '@codebuff/common/old-constants'
 import { userMessage } from '@codebuff/common/util/messages'
+import { promptAiSdkStructured } from '@codebuff/sdk'
 import { mapLimit } from 'async'
 import { z } from 'zod/v4'
 
 
@@ -1,21 +1,26 @@
+import { execSync } from 'child_process'
 import fs from 'fs'
-import path from 'path'
 import os from 'os'
-import { execSync } from 'child_process'
+import path from 'path'
 
 import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
+import {
+  CodebuffClient,
+  getUserCredentials,
+  loadLocalAgents,
+} from '@codebuff/sdk'
 import pLimit from 'p-limit'
 
 import { runAgentOnCommit, type ExternalAgentType } from './agent-runner'
 import { formatTaskResults } from './format-output'
 import { judgeCommitResult } from './judge'
-import { analyzeAgentTraces, type AgentTraceData } from './trace-analyzer'
 import { extractAgentLessons, saveAgentLessons } from './lessons-extractor'
-import { CodebuffClient, getUserCredentials, loadLocalAgents } from '@codebuff/sdk'
+import { analyzeAgentTraces, type AgentTraceData } from './trace-analyzer'
 import { logger } from '../logger'
-import type { AgentEvalResults, EvalDataV2, EvalCommitV2 } from './types'
 import { analyzeAllTasks } from './meta-analyzer'
 
+import type { AgentEvalResults, EvalDataV2, EvalCommitV2 } from './types'
+
 function parseAgentId(agent: string): {
   agentId: string
   externalAgentType?: ExternalAgentType
@@ -340,12 +345,12 @@ export async function runBuffBench(options: {
     (f) => f.data.binInstalls ?? [],
   )
   const uniqueBinInstalls = allBinInstalls.filter(
-    (bin, index, self) =>
-      index === self.findIndex((b) => b.name === bin.name),
+    (bin, index, self) => index === self.findIndex((b) => b.name === bin.name),
   )
 
   // Install binaries once at the beginning
-  const { tempDir: binsTempDir, env: binsEnv } = installBinaries(uniqueBinInstalls)
+  const { tempDir: binsTempDir, env: binsEnv } =
+    installBinaries(uniqueBinInstalls)
 
   let commitsToRun: CommitWithSource[]
   if (taskIds && taskIds.length > 0) {
@@ -362,7 +367,9 @@ export async function runBuffBench(options: {
     }
 
     if (notFoundIds.length > 0) {
-      const availableIds = allCommitsWithSource.map((c) => c.commit.id).join(', ')
+      const availableIds = allCommitsWithSource
+        .map((c) => c.commit.id)
+        .join(', ')
       throw new Error(
         `Task ID(s) not found: ${notFoundIds.join(', ')}. Available task IDs: ${availableIds}`,
       )
@@ -473,7 +480,7 @@ export async function runBuffBench(options: {
     }
   }
 
-  for (const [_agentId, agentData] of Object.entries(results)) {
+  for (const agentData of Object.values(results)) {
     // Filter out runs from commits where ANY agent had an error
     const validRuns = agentData.runs.filter(
       (r) => !commitShasWithErrors.has(r.commitSha),
 
@@ -5,8 +5,8 @@ import path from 'path'
 import { runAgentStep } from '@codebuff/agent-runtime/run-agent-step'
 import { assembleLocalAgentTemplates } from '@codebuff/agent-runtime/templates/agent-registry'
 import { getFileTokenScores } from '@codebuff/code-map/parse'
-import { clientToolCallSchema } from '@codebuff/common/tools/list'
 import { API_KEY_ENV_VAR, TEST_USER_ID } from '@codebuff/common/old-constants'
+import { clientToolCallSchema } from '@codebuff/common/tools/list'
 import { generateCompactId } from '@codebuff/common/util/string'
 import { getSystemInfo } from '@codebuff/common/util/system-info'
 import { ToolHelpers } from '@codebuff/sdk'
@@ -297,14 +297,12 @@ export async function runToolCalls(toolCalls: ClientToolCall[]) {
 export async function loopMainPrompt({
   sessionState,
   prompt,
-  projectPath,
   maxIterations,
   stopCondition,
   agentType,
 }: {
   sessionState: SessionState
   prompt: string
-  projectPath: string
   maxIterations: number
   stopCondition?: (sessionState: AgentState) => boolean
   agentType: AgentTemplateType
 
@@ -1,16 +1,18 @@
 import * as fs from 'fs'
 import * as path from 'path'
-import { createTwoFilesPatch } from 'diff'
 
+import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
 import {
-  AgentDefinition,
   CodebuffClient,
   getUserCredentials,
   loadLocalAgents,
 } from '@codebuff/sdk'
-import { API_KEY_ENV_VAR } from '@codebuff/common/old-constants'
+import { createTwoFilesPatch } from 'diff'
+
 import { withTestRepo } from './test-repo-utils'
 
+import type { AgentDefinition } from '@codebuff/sdk'
+
 export const evalPlannerAgent = async (params: {
   client: CodebuffClient
   agentId: string
 
@@ -24,7 +24,7 @@ export const SWE_BENCH_PYTHON_PATH = path.join(
 // Mock required environment variables for tests
 export function setupTestEnvironmentVariables() {
   // Set up mock environment variables needed for tests
-    // Add other required environment variables as needed
+  // Add other required environment variables as needed
 }
 
 // Patch the run_docker.py script to add git config command
Original file line number	Diff line number	Diff line change
`@@ -9,10 +9,7 @@`
`9`	`9`	`}`
`10`	`10`	`],`
`11`	`11`	`"initCommand": "bun install && git checkout -- bun.lock",`
`12`		`- "finalCheckCommands": [`
`13`		`- "bun run typecheck",`
`14`		`- "bun run test"`
`15`		`- ],`
	`12`	`+ "finalCheckCommands": ["bun run typecheck", "bun run test"],`
`16`	`13`	`"env": {`
`17`	`14`	`"ANTHROPIC_API_KEY": "test-key",`
`18`	`15`	`"ANTHROPIC_API_KEY2": "test-key-2",`
`@@ -3193,4 +3190,4 @@`
`3193`	`3190`	`]`
`3194`	`3191`	`}`
`3195`	`3192`	`]`
`3196`		`-}`
	`3193`	`+}`
Original file line number	Diff line number	Diff line change
`@@ -24,7 +24,7 @@ export const SWE_BENCH_PYTHON_PATH = path.join(`
`24`	`24`	`// Mock required environment variables for tests`
`25`	`25`	`export function setupTestEnvironmentVariables() {`
`26`	`26`	`// Set up mock environment variables needed for tests`
`27`		`- // Add other required environment variables as needed`
	`27`	`+ // Add other required environment variables as needed`
`28`	`28`	`}`
`29`	`29`
`30`	`30`	`// Patch the run_docker.py script to add git config command`