Skip to content

Commit 0fec560

Browse files
committed
add cost breakdown to email
1 parent 76d0c58 commit 0fec560

File tree

1 file changed

+50
-3
lines changed

1 file changed

+50
-3
lines changed

evals/git-evals/email-eval-results.ts

Lines changed: 50 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -43,8 +43,25 @@ function formatEvalSummaryForEmail(
4343
(sum, result) => sum + result.overall_metrics.average_code_quality,
4444
0,
4545
) / evalResults.length
46+
const avgCostUsd =
47+
evalResults.reduce(
48+
(sum, result) => sum + result.overall_metrics.average_cost_usd,
49+
0,
50+
) / evalResults.length
51+
const totalCostUsd = evalResults.reduce(
52+
(sum, result) =>
53+
sum +
54+
result.overall_metrics.average_cost_usd *
55+
result.overall_metrics.total_runs,
56+
0,
57+
)
58+
const avgRuntimeSec =
59+
evalResults.reduce(
60+
(sum, result) => sum + result.overall_metrics.average_runtime_sec,
61+
0,
62+
) / evalResults.length
4663

47-
const subject = `Codebuf Eval Results - ${title ? title : new Date().toLocaleDateString()} - Overall Score: ${avgOverallScore.toFixed(1)}/10`
64+
const subject = `Codebuff Eval Results - ${title ? title : new Date().toLocaleDateString()} - Score: ${avgOverallScore.toFixed(1)}/10 | Cost: ${avgCostUsd.toFixed(3)} | ${avgRuntimeSec.toFixed(1)}s`
4865

4966
// Build the complete message as a single string
5067
const summary = analyses.map((analysis) => analysis.summary).join('\n\n')
@@ -56,13 +73,21 @@ function formatEvalSummaryForEmail(
5673
• Efficiency: ${avgEfficiency.toFixed(2)}/10
5774
• Code Quality: ${avgCodeQuality.toFixed(2)}/10
5875
76+
💰 COST & PERFORMANCE METRICS
77+
• Average Cost per Run: ${avgCostUsd.toFixed(4)}
78+
• Total Cost: ${totalCostUsd.toFixed(2)}
79+
• Average Runtime: ${avgRuntimeSec.toFixed(1)} seconds
80+
• Cost per Point (Overall Score): ${(avgCostUsd / avgOverallScore).toFixed(4)}
81+
5982
📈 BY EVAL SET:
6083
${evalResults
6184
.map(
6285
(result) => `${result.test_repo_name}:
6386
- Success: ${result.overall_metrics.successful_runs}/${result.overall_metrics.total_runs}
6487
- Overall: ${result.overall_metrics.average_overall.toFixed(1)}/10
65-
- Completion: ${result.overall_metrics.average_completion.toFixed(1)}/10`,
88+
- Completion: ${result.overall_metrics.average_completion.toFixed(1)}/10
89+
- Avg Cost: ${result.overall_metrics.average_cost_usd.toFixed(4)}
90+
- Avg Runtime: ${result.overall_metrics.average_runtime_sec.toFixed(1)}s`,
6691
)
6792
.join('\n')}`
6893

@@ -103,18 +128,40 @@ ${allProblems
103128
const recommendations = `💡 DEVELOPMENT RECOMMENDATIONS:
104129
${uniqueRecommendations.map((rec, i) => `${i + 1}. ${rec}`).join('\n')}`
105130

131+
// Add detailed cost breakdown section
132+
const costBreakdown = `💸 DETAILED COST BREAKDOWN
133+
${evalResults
134+
.map((result) => {
135+
const setCost =
136+
result.overall_metrics.average_cost_usd *
137+
result.overall_metrics.total_runs
138+
const costPerSuccessfulRun =
139+
result.overall_metrics.successful_runs > 0
140+
? setCost / result.overall_metrics.successful_runs
141+
: 0
142+
return `${result.test_repo_name}:
143+
- Total Set Cost: ${setCost.toFixed(3)}
144+
- Cost per Run: ${result.overall_metrics.average_cost_usd.toFixed(4)}
145+
- Cost per Successful Run: ${costPerSuccessfulRun.toFixed(4)}
146+
- Runtime Efficiency: ${result.overall_metrics.average_runtime_sec > 0 ? (result.overall_metrics.average_overall / result.overall_metrics.average_runtime_sec).toFixed(3) : 'N/A'} points/sec`
147+
})
148+
.join('\n')}`
149+
106150
// Combine everything into a single message
107151
const message = `${summary}
108152
109153
${metrics}
110154
155+
${costBreakdown}
156+
111157
${topProblems}
112158
113159
${recommendations}
114160
115161
Generated on: ${new Date().toISOString()}
116162
Total Eval Sets: ${evalResults.length}
117-
Total Runs: ${totalRuns}`
163+
Total Runs: ${totalRuns}
164+
Total Budget Used: $${totalCostUsd.toFixed(2)}`
118165

119166
return {
120167
subject,

0 commit comments

Comments
 (0)