@@ -16,24 +16,15 @@ export interface AgentTraceData {
1616 timestamp : string
1717}
1818
19- interface AgentComparison {
20- overallAnalysis : string
21- agentFeedback : Array < {
22- agentId : string
23- strengths : string [ ]
24- weaknesses : string [ ]
25- relativePerformance : string
26- } >
27- recommendations : string [ ]
28- }
29-
3019function truncateTrace ( trace : AgentStep [ ] ) : AgentStep [ ] {
3120 return trace . map ( ( step ) => ( {
3221 ...step ,
3322 toolResults : step . toolResults . map ( ( result ) => {
3423 // Truncate read_files, run_terminal_command, and code_search results to save tokens
3524 if ( result . toolName === 'read_files' && result . output ) {
36- const output = Array . isArray ( result . output ) ? result . output : [ result . output ]
25+ const output = Array . isArray ( result . output )
26+ ? result . output
27+ : [ result . output ]
3728 const truncatedOutput = output . map ( ( item : any ) => {
3829 if ( item . type === 'json' && Array . isArray ( item . value ) ) {
3930 // Truncate file contents in read_files results
@@ -58,19 +49,22 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
5849 output : truncatedOutput ,
5950 }
6051 }
61-
52+
6253 // Truncate run_terminal_command results (keep first 500 chars)
6354 if ( result . toolName === 'run_terminal_command' && result . output ) {
64- const output = Array . isArray ( result . output ) ? result . output : [ result . output ]
55+ const output = Array . isArray ( result . output )
56+ ? result . output
57+ : [ result . output ]
6558 const truncatedOutput = output . map ( ( item : any ) => {
6659 if ( item . type === 'json' && item . value ?. stdout ) {
6760 return {
6861 ...item ,
6962 value : {
7063 ...item . value ,
71- stdout : item . value . stdout . length > 500
72- ? item . value . stdout . slice ( 0 , 500 ) + '... [TRUNCATED]'
73- : item . value . stdout ,
64+ stdout :
65+ item . value . stdout . length > 500
66+ ? item . value . stdout . slice ( 0 , 500 ) + '... [TRUNCATED]'
67+ : item . value . stdout ,
7468 } ,
7569 }
7670 }
@@ -81,19 +75,22 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
8175 output : truncatedOutput ,
8276 }
8377 }
84-
78+
8579 // Truncate code_search results (keep first 500 chars)
8680 if ( result . toolName === 'code_search' && result . output ) {
87- const output = Array . isArray ( result . output ) ? result . output : [ result . output ]
81+ const output = Array . isArray ( result . output )
82+ ? result . output
83+ : [ result . output ]
8884 const truncatedOutput = output . map ( ( item : any ) => {
8985 if ( item . type === 'json' && item . value ?. stdout ) {
9086 return {
9187 ...item ,
9288 value : {
9389 ...item . value ,
94- stdout : item . value . stdout . length > 500
95- ? item . value . stdout . slice ( 0 , 500 ) + '... [TRUNCATED]'
96- : item . value . stdout ,
90+ stdout :
91+ item . value . stdout . length > 500
92+ ? item . value . stdout . slice ( 0 , 500 ) + '... [TRUNCATED]'
93+ : item . value . stdout ,
9794 } ,
9895 }
9996 }
@@ -104,7 +101,7 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
104101 output : truncatedOutput ,
105102 }
106103 }
107-
104+
108105 return result
109106 } ) ,
110107 } ) )
@@ -113,7 +110,7 @@ function truncateTrace(trace: AgentStep[]): AgentStep[] {
113110const traceAnalyzerAgent : AgentDefinition = {
114111 id : 'git-evals2-trace-analyzer' ,
115112 displayName : 'Git Evals2 Trace Analyzer' ,
116- model : 'anthropic/claude-3.5-sonnet ' ,
113+ model : 'openai/gpt-5 ' ,
117114 toolNames : [ 'set_output' ] ,
118115 inputSchema : {
119116 prompt : { type : 'string' , description : 'The analysis prompt' } ,
@@ -205,7 +202,16 @@ export async function analyzeAgentTraces({
205202 client : CodebuffClient
206203 traces : AgentTraceData [ ]
207204 spec : string
208- } ) : Promise < AgentComparison > {
205+ } ) : Promise < {
206+ overallAnalysis : string
207+ agentFeedback : Array < {
208+ agentId : string
209+ strengths : string [ ]
210+ weaknesses : string [ ]
211+ relativePerformance : string
212+ } >
213+ recommendations : string [ ]
214+ } > {
209215 const truncatedTraces = traces . map ( ( t ) => ( {
210216 agentId : t . agentId ,
211217 trace : truncateTrace ( t . trace ) ,
@@ -247,10 +253,12 @@ Focus on the HOW, not the WHAT: We want to understand and improve how agents wor
247253 agentDefinitions : [ traceAnalyzerAgent ] ,
248254 } )
249255
250- if ( analyzerResult . output . type !== 'structuredOutput' ) {
256+ const { output } = analyzerResult
257+
258+ if ( output . type !== 'structuredOutput' || output . value === null ) {
251259 console . error (
252260 'Error running trace analyzer - not structured output' ,
253- JSON . stringify ( analyzerResult . output , null , 2 ) ,
261+ JSON . stringify ( output , null , 2 ) ,
254262 )
255263 return {
256264 overallAnalysis : 'Error running trace analyzer - not structured output' ,
@@ -259,5 +267,5 @@ Focus on the HOW, not the WHAT: We want to understand and improve how agents wor
259267 }
260268 }
261269
262- return analyzerResult . output . value as AgentComparison
270+ return output . value as any
263271}
0 commit comments