Skip to content

Commit daefd57

Browse files
committed
eval-planner: load prev commit
1 parent 8af0443 commit daefd57

File tree

1 file changed

+34
-8
lines changed

1 file changed

+34
-8
lines changed

evals/subagents/eval-planner.ts

Lines changed: 34 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,11 @@ export const withTestRepo = async <T>(
1919
repoUrl: string
2020
commitSha: string
2121
initCommand?: string
22+
checkoutPrevious?: boolean
2223
},
2324
fn: (cwd: string) => Promise<T>,
2425
): Promise<T> => {
25-
const { repoUrl, commitSha, initCommand } = repoConfig
26+
const { repoUrl, commitSha, initCommand, checkoutPrevious } = repoConfig
2627

2728
// Create a temporary directory for the test repo
2829
const tempDir = fs.mkdtempSync(path.join(os.tmpdir(), 'codebuff-eval-'))
@@ -33,9 +34,18 @@ export const withTestRepo = async <T>(
3334
console.log(`Cloning repository ${repoUrl} to ${repoDir}...`)
3435
execSync(`git clone ${repoUrl} ${repoDir}`, { stdio: 'ignore' })
3536

36-
// Checkout the specific commit
37-
console.log(`Checking out commit ${commitSha}...`)
38-
execSync(`git checkout ${commitSha}`, { cwd: repoDir, stdio: 'ignore' })
37+
// Checkout the specific commit or the previous commit
38+
if (checkoutPrevious) {
39+
const previousCommitSha = getPreviousCommitSha(commitSha, repoDir)
40+
console.log(`Checking out previous commit ${previousCommitSha}...`)
41+
execSync(`git checkout ${previousCommitSha}`, {
42+
cwd: repoDir,
43+
stdio: 'ignore',
44+
})
45+
} else {
46+
console.log(`Checking out commit ${commitSha}...`)
47+
execSync(`git checkout ${commitSha}`, { cwd: repoDir, stdio: 'ignore' })
48+
}
3949

4050
// Run initialization command if provided
4151
if (initCommand) {
@@ -56,6 +66,17 @@ export const withTestRepo = async <T>(
5666
}
5767
}
5868

69+
/**
70+
* Gets the previous commit SHA (parent) of a given commit
71+
*/
72+
const getPreviousCommitSha = (commitSha: string, repoDir: string): string => {
73+
const previousSha = execSync(`git rev-parse ${commitSha}^`, {
74+
cwd: repoDir,
75+
encoding: 'utf-8',
76+
}).trim()
77+
return previousSha
78+
}
79+
5980
export const evalPlannerAgent = async (params: {
6081
spec: string
6182
repoUrl: string
@@ -74,9 +95,8 @@ export const evalPlannerAgent = async (params: {
7495
const client = new CodebuffClient({
7596
apiKey: process.env[API_KEY_ENV_VAR] || getLocalAuthToken(),
7697
})
77-
7898
const result = await withTestRepo(
79-
{ repoUrl, commitSha, initCommand },
99+
{ repoUrl, commitSha, initCommand, checkoutPrevious: true },
80100
async (cwd) => {
81101
// Run the agent with the test repository as cwd
82102
console.log(
@@ -88,7 +108,10 @@ export const evalPlannerAgent = async (params: {
88108
cwd,
89109
agentDefinitions: [implementationPlannerAgent],
90110
handleEvent: (event) => {
91-
console.log('Codebuff Event', JSON.stringify(event, null, 2))
111+
console.log(
112+
implementationPlannerAgent.id,
113+
JSON.stringify(event, null, 2),
114+
)
92115
},
93116
})
94117
},
@@ -161,6 +184,9 @@ Evaluate how well the implementation plan matches the real commit changes. Consi
161184
agent: 'eval-judge',
162185
prompt: judgePrompt,
163186
agentDefinitions: [judgeAgent],
187+
handleEvent: (event) => {
188+
console.log('eval-judge', JSON.stringify(event, null, 2))
189+
},
164190
})
165191
if (judgeResult.output.type !== 'structuredOutput') {
166192
throw new Error('Error running judge agent')
@@ -174,7 +200,7 @@ Evaluate how well the implementation plan matches the real commit changes. Consi
174200
const judgeAgent: AgentDefinition = {
175201
id: 'eval-judge',
176202
displayName: 'Eval Judge',
177-
model: 'x-ai/grok-4-fast:free',
203+
model: 'x-ai/grok-4-fast',
178204
toolNames: ['set_output'],
179205
inputSchema: {
180206
prompt: { type: 'string', description: 'The prompt to judge' },

0 commit comments

Comments
 (0)