@@ -19,10 +19,11 @@ export const withTestRepo = async <T>(
1919 repoUrl : string
2020 commitSha : string
2121 initCommand ?: string
22+ checkoutPrevious ?: boolean
2223 } ,
2324 fn : ( cwd : string ) => Promise < T > ,
2425) : Promise < T > => {
25- const { repoUrl, commitSha, initCommand } = repoConfig
26+ const { repoUrl, commitSha, initCommand, checkoutPrevious } = repoConfig
2627
2728 // Create a temporary directory for the test repo
2829 const tempDir = fs . mkdtempSync ( path . join ( os . tmpdir ( ) , 'codebuff-eval-' ) )
@@ -33,9 +34,18 @@ export const withTestRepo = async <T>(
3334 console . log ( `Cloning repository ${ repoUrl } to ${ repoDir } ...` )
3435 execSync ( `git clone ${ repoUrl } ${ repoDir } ` , { stdio : 'ignore' } )
3536
36- // Checkout the specific commit
37- console . log ( `Checking out commit ${ commitSha } ...` )
38- execSync ( `git checkout ${ commitSha } ` , { cwd : repoDir , stdio : 'ignore' } )
37+ // Checkout the specific commit or the previous commit
38+ if ( checkoutPrevious ) {
39+ const previousCommitSha = getPreviousCommitSha ( commitSha , repoDir )
40+ console . log ( `Checking out previous commit ${ previousCommitSha } ...` )
41+ execSync ( `git checkout ${ previousCommitSha } ` , {
42+ cwd : repoDir ,
43+ stdio : 'ignore' ,
44+ } )
45+ } else {
46+ console . log ( `Checking out commit ${ commitSha } ...` )
47+ execSync ( `git checkout ${ commitSha } ` , { cwd : repoDir , stdio : 'ignore' } )
48+ }
3949
4050 // Run initialization command if provided
4151 if ( initCommand ) {
@@ -56,6 +66,17 @@ export const withTestRepo = async <T>(
5666 }
5767}
5868
69+ /**
70+ * Gets the previous commit SHA (parent) of a given commit
71+ */
72+ const getPreviousCommitSha = ( commitSha : string , repoDir : string ) : string => {
73+ const previousSha = execSync ( `git rev-parse ${ commitSha } ^` , {
74+ cwd : repoDir ,
75+ encoding : 'utf-8' ,
76+ } ) . trim ( )
77+ return previousSha
78+ }
79+
5980export const evalPlannerAgent = async ( params : {
6081 spec : string
6182 repoUrl : string
@@ -74,9 +95,8 @@ export const evalPlannerAgent = async (params: {
7495 const client = new CodebuffClient ( {
7596 apiKey : process . env [ API_KEY_ENV_VAR ] || getLocalAuthToken ( ) ,
7697 } )
77-
7898 const result = await withTestRepo (
79- { repoUrl, commitSha, initCommand } ,
99+ { repoUrl, commitSha, initCommand, checkoutPrevious : true } ,
80100 async ( cwd ) => {
81101 // Run the agent with the test repository as cwd
82102 console . log (
@@ -88,7 +108,10 @@ export const evalPlannerAgent = async (params: {
88108 cwd,
89109 agentDefinitions : [ implementationPlannerAgent ] ,
90110 handleEvent : ( event ) => {
91- console . log ( 'Codebuff Event' , JSON . stringify ( event , null , 2 ) )
111+ console . log (
112+ implementationPlannerAgent . id ,
113+ JSON . stringify ( event , null , 2 ) ,
114+ )
92115 } ,
93116 } )
94117 } ,
@@ -161,6 +184,9 @@ Evaluate how well the implementation plan matches the real commit changes. Consi
161184 agent : 'eval-judge' ,
162185 prompt : judgePrompt ,
163186 agentDefinitions : [ judgeAgent ] ,
187+ handleEvent : ( event ) => {
188+ console . log ( 'eval-judge' , JSON . stringify ( event , null , 2 ) )
189+ } ,
164190 } )
165191 if ( judgeResult . output . type !== 'structuredOutput' ) {
166192 throw new Error ( 'Error running judge agent' )
@@ -174,7 +200,7 @@ Evaluate how well the implementation plan matches the real commit changes. Consi
174200const judgeAgent : AgentDefinition = {
175201 id : 'eval-judge' ,
176202 displayName : 'Eval Judge' ,
177- model : 'x-ai/grok-4-fast:free ' ,
203+ model : 'x-ai/grok-4-fast' ,
178204 toolNames : [ 'set_output' ] ,
179205 inputSchema : {
180206 prompt : { type : 'string' , description : 'The prompt to judge' } ,
0 commit comments