Skip to content

Commit 815129f

Browse files
committed
Misc refactoring
1 parent 3974b74 commit 815129f

File tree

3 files changed

+35
-52
lines changed

3 files changed

+35
-52
lines changed

evals/git-evals2/example.ts

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,6 @@ import path from 'path'
22
import { runGitEvals2 } from './run-git-evals2'
33

44
async function main() {
5-
console.log('Running git-evals2 example...')
65
console.log('Comparing base and base-lite agents on first 3 commits\n')
76

87
const results = await runGitEvals2({

evals/git-evals2/run-git-evals2.ts

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -8,17 +8,20 @@ import { CodebuffClient } from '../../sdk/src/client'
88
import { runAgentOnCommit } from './agent-runner'
99
import { judgeCommitResult } from './judge'
1010
import { analyzeAgentTraces, type AgentTraceData } from './trace-analyzer'
11-
12-
import type {
13-
EvalData,
14-
GitEvals2Options,
15-
GitEvals2Result,
16-
AgentEvalResults,
17-
} from './types'
18-
19-
export async function runGitEvals2(
20-
options: GitEvals2Options,
21-
): Promise<GitEvals2Result> {
11+
import { AgentEvalResults, EvalData, ProgressEvent } from './types'
12+
13+
export async function runGitEvals2(options: {
14+
evalDataPath: string
15+
agents: string[]
16+
outputPath?: string
17+
limit?: number
18+
onProgress?: (event: ProgressEvent) => void
19+
client?: CodebuffClient
20+
}): Promise<{
21+
agents: Record<string, AgentEvalResults>
22+
timestamp: string
23+
totalDuration: number
24+
}> {
2225
const { evalDataPath, agents, outputPath, limit, onProgress } = options
2326

2427
const evalData: EvalData = JSON.parse(fs.readFileSync(evalDataPath, 'utf-8'))
@@ -33,7 +36,7 @@ export async function runGitEvals2(
3336
})
3437

3538
const startTime = Date.now()
36-
const results = new Map<string, AgentEvalResults>()
39+
const results: Record<string, AgentEvalResults> = {}
3740

3841
// Create logs directory with current date and time
3942
const date = new Date().toISOString().replace(/:/g, '-').slice(0, 16) // YYYY-MM-DDTHH-MM
@@ -46,13 +49,13 @@ export async function runGitEvals2(
4649
}
4750

4851
for (const agentId of agents) {
49-
results.set(agentId, {
52+
results[agentId] = {
5053
agentId,
5154
runs: [],
5255
averageScore: 0,
5356
averageCost: 0,
5457
averageDuration: 0,
55-
})
58+
}
5659
}
5760

5861
for (const commit of commitsToRun) {
@@ -90,8 +93,7 @@ export async function runGitEvals2(
9093
commitSha: commit.sha,
9194
spec: commit.spec,
9295
diff: agentResult.diff,
93-
judgeScore: judgeResult.overallScore,
94-
judgeFeedback: judgeResult.analysis,
96+
judging: judgeResult,
9597
cost: agentResult.cost,
9698
durationMs: agentResult.durationMs,
9799
error: agentResult.error,
@@ -101,7 +103,7 @@ export async function runGitEvals2(
101103
const safeSpec = commit.spec
102104
.split('\n')[0]
103105
.replace(/[^a-zA-Z0-9]/g, '_')
104-
.slice(0, 30)
106+
.slice(0, 20)
105107
const safeAgentId = agentId.replace(/[^a-zA-Z0-9-]/g, '_')
106108
const safeCommitShort = commit.sha.slice(0, 7)
107109
const traceFilename = `${safeSpec}-${safeAgentId}-${safeCommitShort}.json`
@@ -151,8 +153,14 @@ export async function runGitEvals2(
151153
commitSha: commit.sha,
152154
spec: commit.spec,
153155
diff: '',
154-
judgeScore: 0,
155-
judgeFeedback: '',
156+
judging: {
157+
analysis: '',
158+
strengths: [],
159+
weaknesses: [],
160+
completionScore: 0,
161+
codeQualityScore: 0,
162+
overallScore: 0,
163+
},
156164
cost: 0,
157165
durationMs: 0,
158166
error: errorMessage,
@@ -164,8 +172,7 @@ export async function runGitEvals2(
164172
const agentResults = await Promise.all(agentPromises)
165173

166174
for (const { agentId, evalRun } of agentResults) {
167-
const agentData = results.get(agentId)!
168-
agentData.runs.push(evalRun)
175+
results[agentId].runs.push(evalRun)
169176
}
170177

171178
// After all agents complete for this commit, run trace analysis
@@ -208,13 +215,13 @@ export async function runGitEvals2(
208215
}
209216
}
210217

211-
for (const [agentId, agentData] of results) {
218+
for (const [agentId, agentData] of Object.entries(results)) {
212219
const successfulRuns = agentData.runs.filter((r) => !r.error)
213220
const totalRuns = agentData.runs.length
214221

215222
agentData.averageScore =
216223
successfulRuns.length > 0
217-
? successfulRuns.reduce((sum, r) => sum + r.judgeScore, 0) /
224+
? successfulRuns.reduce((sum, r) => sum + r.judging.overallScore, 0) /
218225
successfulRuns.length
219226
: 0
220227

@@ -229,7 +236,7 @@ export async function runGitEvals2(
229236
: 0
230237
}
231238

232-
const result: GitEvals2Result = {
239+
const result = {
233240
agents: results,
234241
timestamp: new Date().toISOString(),
235242
totalDuration: Date.now() - startTime,
@@ -241,20 +248,13 @@ export async function runGitEvals2(
241248
fs.mkdirSync(outputDir, { recursive: true })
242249
}
243250

244-
const serializedResult = {
245-
...result,
246-
agents: Array.from(result.agents.entries()).map(([id, data]) => ({
247-
id,
248-
...data,
249-
})),
250-
}
251-
fs.writeFileSync(outputPath, JSON.stringify(serializedResult, null, 2))
251+
fs.writeFileSync(outputPath, JSON.stringify(result, null, 2))
252252
console.log(`\nResults written to ${outputPath}`)
253253
}
254254

255255
console.log(`\nTraces saved to ${logsDir}`)
256256
console.log('\n=== Summary ===')
257-
for (const [agentId, data] of results) {
257+
for (const [agentId, data] of Object.entries(results)) {
258258
console.log(`\n${agentId}:`)
259259
console.log(` Score: ${data.averageScore.toFixed(2)}/10`)
260260
console.log(` Cost: $${data.averageCost.toFixed(4)}`)

evals/git-evals2/types.ts

Lines changed: 2 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
import type { CodebuffClient } from '../../sdk/src/client'
1+
import type { JudgingResult } from './judge'
22

33
export interface FileState {
44
path: string
@@ -25,8 +25,7 @@ export interface EvalRun {
2525
commitSha: string
2626
spec: string
2727
diff: string
28-
judgeScore: number
29-
judgeFeedback: string
28+
judging: JudgingResult
3029
cost: number
3130
durationMs: number
3231
error?: string
@@ -58,18 +57,3 @@ export type ProgressEvent =
5857
commit: string
5958
error: string
6059
}
61-
62-
export interface GitEvals2Options {
63-
evalDataPath: string
64-
agents: string[]
65-
outputPath?: string
66-
limit?: number
67-
onProgress?: (event: ProgressEvent) => void
68-
client?: CodebuffClient
69-
}
70-
71-
export interface GitEvals2Result {
72-
agents: Map<string, AgentEvalResults>
73-
timestamp: string
74-
totalDuration: number
75-
}

0 commit comments

Comments
 (0)