Skip to content

Commit d5e4c5d

Browse files
committed
Show meta-analysis in buffbench email
1 parent f382d0f commit d5e4c5d

File tree

2 files changed

+41
-3
lines changed

2 files changed

+41
-3
lines changed

evals/buffbench/main-nightly.ts

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import { sendBasicEmail } from '@codebuff/internal/loops'
44

55
import { runBuffBench } from './run-buffbench'
66
import type { AgentEvalResults } from './types'
7+
import type { MetaAnalysisResult } from './meta-analyzer'
78

89
async function main() {
910
console.log('Starting nightly buffbench evaluation...')
@@ -23,8 +24,8 @@ async function main() {
2324
const recipientEmail = process.env.EVAL_RESULTS_EMAIL || 'team@codebuff.com'
2425
console.log(`\n📧 Sending buffbench results email to ${recipientEmail}...`)
2526

26-
const { metadata, ...agentResults } = results
27-
const emailContent = formatBuffBenchEmailContent(agentResults, metadata)
27+
const { metadata, metaAnalysis, ...agentResults } = results
28+
const emailContent = formatBuffBenchEmailContent(agentResults, metadata, metaAnalysis)
2829

2930
try {
3031
const emailResult = await sendBasicEmail({
@@ -48,6 +49,7 @@ async function main() {
4849
function formatBuffBenchEmailContent(
4950
results: Record<string, AgentEvalResults>,
5051
metadata: any,
52+
metaAnalysis?: MetaAnalysisResult,
5153
) {
5254
const agents = Object.keys(results)
5355
const date = new Date().toLocaleDateString()
@@ -69,7 +71,7 @@ function formatBuffBenchEmailContent(
6971
)
7072
.join('\n\n')
7173

72-
const message = `📊 NIGHTLY BUFFBENCH RESULTS
74+
let message = `📊 NIGHTLY BUFFBENCH RESULTS
7375
7476
📈 AGENT RESULTS:
7577
${agentComparison}
@@ -82,6 +84,41 @@ ${agentComparison}
8284
Generated on: ${metadata.timestamp}
8385
Repository: ${metadata.repoUrl}`
8486

87+
if (metaAnalysis) {
88+
message += `
89+
90+
🔍 META-ANALYSIS
91+
92+
Overall Comparison:
93+
${metaAnalysis.overallComparison}`
94+
95+
if (metaAnalysis.agentInsights.length > 0) {
96+
message += `\n\nAgent-Specific Insights:`
97+
for (const insight of metaAnalysis.agentInsights) {
98+
message += `\n\n[${insight.agentId}]`
99+
if (insight.consistentStrengths.length > 0) {
100+
message += `\n Strengths: ${insight.consistentStrengths.join(', ')}`
101+
}
102+
if (insight.consistentWeaknesses.length > 0) {
103+
message += `\n Weaknesses: ${insight.consistentWeaknesses.join(', ')}`
104+
}
105+
if (insight.recommendations.length > 0) {
106+
message += `\n Recommendations:`
107+
insight.recommendations.forEach((rec) => {
108+
message += `\n • ${rec}`
109+
})
110+
}
111+
}
112+
}
113+
114+
if (metaAnalysis.keyFindings.length > 0) {
115+
message += `\n\nKey Findings:`
116+
metaAnalysis.keyFindings.forEach((finding, i) => {
117+
message += `\n ${i + 1}. ${finding}`
118+
})
119+
}
120+
}
121+
85122
return { subject, message }
86123
}
87124

evals/buffbench/run-buffbench.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -493,6 +493,7 @@ export async function runBuffBench(options: {
493493
logsDirectory: logsDir,
494494
files: logFiles,
495495
},
496+
metaAnalysis,
496497
...results,
497498
}
498499

0 commit comments

Comments
 (0)