@@ -43,8 +43,25 @@ function formatEvalSummaryForEmail(
4343 ( sum , result ) => sum + result . overall_metrics . average_code_quality ,
4444 0 ,
4545 ) / evalResults . length
46+ const avgCostUsd =
47+ evalResults . reduce (
48+ ( sum , result ) => sum + result . overall_metrics . average_cost_usd ,
49+ 0 ,
50+ ) / evalResults . length
51+ const totalCostUsd = evalResults . reduce (
52+ ( sum , result ) =>
53+ sum +
54+ result . overall_metrics . average_cost_usd *
55+ result . overall_metrics . total_runs ,
56+ 0 ,
57+ )
58+ const avgRuntimeSec =
59+ evalResults . reduce (
60+ ( sum , result ) => sum + result . overall_metrics . average_runtime_sec ,
61+ 0 ,
62+ ) / evalResults . length
4663
47- const subject = `Codebuf Eval Results - ${ title ? title : new Date ( ) . toLocaleDateString ( ) } - Overall Score: ${ avgOverallScore . toFixed ( 1 ) } /10`
64+ const subject = `Codebuff Eval Results - ${ title ? title : new Date ( ) . toLocaleDateString ( ) } - Score: ${ avgOverallScore . toFixed ( 1 ) } /10 | Cost: ${ avgCostUsd . toFixed ( 3 ) } | ${ avgRuntimeSec . toFixed ( 1 ) } s `
4865
4966 // Build the complete message as a single string
5067 const summary = analyses . map ( ( analysis ) => analysis . summary ) . join ( '\n\n' )
@@ -56,13 +73,21 @@ function formatEvalSummaryForEmail(
5673• Efficiency: ${ avgEfficiency . toFixed ( 2 ) } /10
5774• Code Quality: ${ avgCodeQuality . toFixed ( 2 ) } /10
5875
76+ 💰 COST & PERFORMANCE METRICS
77+ • Average Cost per Run: ${ avgCostUsd . toFixed ( 4 ) }
78+ • Total Cost: ${ totalCostUsd . toFixed ( 2 ) }
79+ • Average Runtime: ${ avgRuntimeSec . toFixed ( 1 ) } seconds
80+ • Cost per Point (Overall Score): ${ ( avgCostUsd / avgOverallScore ) . toFixed ( 4 ) }
81+
5982📈 BY EVAL SET:
6083${ evalResults
6184 . map (
6285 ( result ) => `${ result . test_repo_name } :
6386 - Success: ${ result . overall_metrics . successful_runs } /${ result . overall_metrics . total_runs }
6487 - Overall: ${ result . overall_metrics . average_overall . toFixed ( 1 ) } /10
65- - Completion: ${ result . overall_metrics . average_completion . toFixed ( 1 ) } /10` ,
88+ - Completion: ${ result . overall_metrics . average_completion . toFixed ( 1 ) } /10
89+ - Avg Cost: ${ result . overall_metrics . average_cost_usd . toFixed ( 4 ) }
90+ - Avg Runtime: ${ result . overall_metrics . average_runtime_sec . toFixed ( 1 ) } s` ,
6691 )
6792 . join ( '\n' ) } `
6893
@@ -103,18 +128,40 @@ ${allProblems
103128 const recommendations = `💡 DEVELOPMENT RECOMMENDATIONS:
104129${ uniqueRecommendations . map ( ( rec , i ) => `${ i + 1 } . ${ rec } ` ) . join ( '\n' ) } `
105130
131+ // Add detailed cost breakdown section
132+ const costBreakdown = `💸 DETAILED COST BREAKDOWN
133+ ${ evalResults
134+ . map ( ( result ) => {
135+ const setCost =
136+ result . overall_metrics . average_cost_usd *
137+ result . overall_metrics . total_runs
138+ const costPerSuccessfulRun =
139+ result . overall_metrics . successful_runs > 0
140+ ? setCost / result . overall_metrics . successful_runs
141+ : 0
142+ return `${ result . test_repo_name } :
143+ - Total Set Cost: ${ setCost . toFixed ( 3 ) }
144+ - Cost per Run: ${ result . overall_metrics . average_cost_usd . toFixed ( 4 ) }
145+ - Cost per Successful Run: ${ costPerSuccessfulRun . toFixed ( 4 ) }
146+ - Runtime Efficiency: ${ result . overall_metrics . average_runtime_sec > 0 ? ( result . overall_metrics . average_overall / result . overall_metrics . average_runtime_sec ) . toFixed ( 3 ) : 'N/A' } points/sec`
147+ } )
148+ . join ( '\n' ) } `
149+
106150 // Combine everything into a single message
107151 const message = `${ summary }
108152
109153${ metrics }
110154
155+ ${ costBreakdown }
156+
111157${ topProblems }
112158
113159${ recommendations }
114160
115161Generated on: ${ new Date ( ) . toISOString ( ) }
116162Total Eval Sets: ${ evalResults . length }
117- Total Runs: ${ totalRuns } `
163+ Total Runs: ${ totalRuns }
164+ Total Budget Used: $${ totalCostUsd . toFixed ( 2 ) } `
118165
119166 return {
120167 subject,
0 commit comments