44 * Compare sequential cache debug snapshots to find what's causing prompt cache misses.
55 *
66 * Usage:
7- * bun scripts/compare-cache-debug.ts [directory] [--agent <type>]
7+ * bun scripts/compare-cache-debug.ts [directory] [--agent <type>] [--run <runId>] [--cross-run]
88 *
99 * Options:
10- * --agent <type> Only compare snapshots from this agent type (e.g. base2)
10+ * --agent <type> Only compare snapshots from this agent type (e.g. base2)
11+ * --run <runId> Only compare snapshots from this specific run
12+ * --cross-run Compare all snapshots sequentially (old behavior, across runs)
13+ *
14+ * Default: groups snapshots by runId and compares consecutive steps within each run.
1115 *
1216 * Default directory: debug/cache-debug/
1317 *
@@ -134,6 +138,20 @@ function printSectionHeader(title: string) {
134138 console . log ( `${ '─' . repeat ( 80 ) } ` )
135139}
136140
141+ function stripCacheControlFromMessage ( msg : unknown ) : unknown {
142+ if ( ! msg || typeof msg !== 'object' ) return msg
143+ const obj = JSON . parse ( JSON . stringify ( msg ) )
144+ delete obj . cache_control
145+ if ( Array . isArray ( obj . content ) ) {
146+ for ( const part of obj . content ) {
147+ if ( part && typeof part === 'object' ) {
148+ delete part . cache_control
149+ }
150+ }
151+ }
152+ return obj
153+ }
154+
137155function compareProviderRequests (
138156 prev : Snapshot [ 'providerRequest' ] ,
139157 curr : Snapshot [ 'providerRequest' ] ,
@@ -199,13 +217,27 @@ function compareProviderRequests(
199217 console . log ( ` ✅ messages: identical (${ prevMsgs . length } messages)` )
200218 } else {
201219 console . log ( ` ❌ messages: differ (${ prevMsgs . length } → ${ currMsgs . length } )` )
220+
221+ // Compare with cache_control stripped to check structural stability
202222 const minLen = Math . min ( prevMsgs . length , currMsgs . length )
223+ let firstRawDiff = - 1
224+ let firstStructDiff = - 1
203225 for ( let i = 0 ; i < minLen ; i ++ ) {
204- if ( JSON . stringify ( prevMsgs [ i ] ) !== JSON . stringify ( currMsgs [ i ] ) ) {
205- console . log ( ` First diff at message index ${ i } ` )
206- break
226+ if ( firstRawDiff < 0 && JSON . stringify ( prevMsgs [ i ] ) !== JSON . stringify ( currMsgs [ i ] ) ) {
227+ firstRawDiff = i
228+ }
229+ if ( firstStructDiff < 0 && JSON . stringify ( stripCacheControlFromMessage ( prevMsgs [ i ] ) ) !== JSON . stringify ( stripCacheControlFromMessage ( currMsgs [ i ] ) ) ) {
230+ firstStructDiff = i
207231 }
208232 }
233+ if ( firstRawDiff >= 0 ) {
234+ console . log ( ` First raw diff at message index ${ firstRawDiff } ` )
235+ }
236+ if ( firstStructDiff >= 0 ) {
237+ console . log ( ` First structural diff (ignoring cache_control) at message index ${ firstStructDiff } ` )
238+ } else if ( prevMsgs . length === currMsgs . length ) {
239+ console . log ( ` ✅ Structurally identical (only cache_control placement differs)` )
240+ }
209241 if ( prevMsgs . length !== currMsgs . length ) {
210242 console . log ( ` Message count: ${ prevMsgs . length } → ${ currMsgs . length } ` )
211243 }
@@ -218,7 +250,7 @@ function compareProviderRequests(
218250
219251function comparePair ( prev : Snapshot , curr : Snapshot , prevFile : string , currFile : string ) {
220252 printSectionHeader (
221- `Comparing snapshot ${ prev . index } → ${ curr . index } (${ prev . agentType } )` ,
253+ `Comparing step ${ prev . index } → ${ curr . index } (${ prev . agentType } )` ,
222254 )
223255 console . log ( ` File A: ${ prevFile } ` )
224256 console . log ( ` File B: ${ currFile } ` )
@@ -229,8 +261,8 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile:
229261 if ( prev . systemHash || curr . systemHash ) {
230262 console . log ( ` Hashes: system=${ prev . systemHash ?? '?' } →${ curr . systemHash ?? '?' } tools=${ prev . toolsHash ?? '?' } →${ curr . toolsHash ?? '?' } ` )
231263 }
232- if ( prev . runId || curr . runId ) {
233- console . log ( ` RunId: ${ prev . runId ?? '?' } → ${ curr . runId ?? '?' } ` )
264+ if ( prev . runId !== curr . runId ) {
265+ console . log ( ` ⚠️ Different runs: ${ prev . runId ?? '?' } → ${ curr . runId ?? '?' } ` )
234266 }
235267
236268 const prevSystem = prev . preConversion . systemPrompt
@@ -323,11 +355,6 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile:
323355 console . log ( '\n 🎯 Cache Verdict:' )
324356 const systemIdentical = prevSystem === currSystem
325357 const toolsIdentical = prevToolJson === currToolJson
326- const providerNormIdentical =
327- prev . providerRequest && curr . providerRequest
328- ? JSON . stringify ( prev . providerRequest . normalized ) ===
329- JSON . stringify ( curr . providerRequest . normalized )
330- : undefined
331358
332359 if ( systemIdentical && toolsIdentical ) {
333360 console . log (
@@ -340,40 +367,54 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile:
340367 console . log ( ` ❌ PRE-CONVERSION CACHE MISS expected — ${ causes . join ( ' and ' ) } ` )
341368 }
342369
343- if ( providerNormIdentical === true ) {
344- console . log (
345- ' ✅ Post-conversion (provider) request bodies are IDENTICAL' ,
346- )
347- } else if ( providerNormIdentical === false ) {
348- console . log (
349- ' ❌ Post-conversion (provider) request bodies DIFFER — conversion layer may be introducing instability' ,
350- )
351- if ( systemIdentical && toolsIdentical ) {
352- console . log (
353- ' ⚠️ Pre-conversion was identical but post-conversion differs — bug is in the conversion layer!' ,
354- )
370+ // Check post-conversion structural stability (ignoring cache_control positions)
371+ if ( prev . providerRequest ?. normalized && curr . providerRequest ?. normalized ) {
372+ const prevObj = prev . providerRequest . normalized as Record < string , unknown >
373+ const currObj = curr . providerRequest . normalized as Record < string , unknown >
374+ if ( Array . isArray ( prevObj . messages ) && Array . isArray ( currObj . messages ) ) {
375+ const prevMsgs = prevObj . messages as unknown [ ]
376+ const currMsgs = currObj . messages as unknown [ ]
377+ const minLen = Math . min ( prevMsgs . length , currMsgs . length )
378+ let sharedStructural = 0
379+ for ( let i = 0 ; i < minLen ; i ++ ) {
380+ if ( JSON . stringify ( stripCacheControlFromMessage ( prevMsgs [ i ] ) ) === JSON . stringify ( stripCacheControlFromMessage ( currMsgs [ i ] ) ) ) {
381+ sharedStructural ++
382+ } else {
383+ break
384+ }
385+ }
386+ console . log ( ` 📊 Post-conversion shared prefix: ${ sharedStructural } /${ minLen } messages (ignoring cache_control)` )
387+ if ( sharedStructural < minLen && systemIdentical && toolsIdentical ) {
388+ console . log ( ` ⚠️ Structural content differs in shared prefix — possible conversion issue` )
389+ }
355390 }
356391 }
357392}
358393
359- function parseArgs ( ) : { dir : string ; agentFilter ?: string } {
394+ function parseArgs ( ) : { dir : string ; agentFilter ?: string ; runFilter ?: string ; crossRun : boolean } {
360395 const args = process . argv . slice ( 2 )
361396 let dir = join ( process . cwd ( ) , 'debug' , 'cache-debug' )
362397 let agentFilter : string | undefined
398+ let runFilter : string | undefined
399+ let crossRun = false
363400
364401 for ( let i = 0 ; i < args . length ; i ++ ) {
365402 if ( args [ i ] === '--agent' && i + 1 < args . length ) {
366403 agentFilter = args [ ++ i ]
404+ } else if ( args [ i ] === '--run' && i + 1 < args . length ) {
405+ runFilter = args [ ++ i ]
406+ } else if ( args [ i ] === '--cross-run' ) {
407+ crossRun = true
367408 } else if ( ! args [ i ] . startsWith ( '--' ) ) {
368409 dir = args [ i ]
369410 }
370411 }
371412
372- return { dir, agentFilter }
413+ return { dir, agentFilter, runFilter , crossRun }
373414}
374415
375416function main ( ) {
376- const { dir, agentFilter } = parseArgs ( )
417+ const { dir, agentFilter, runFilter , crossRun } = parseArgs ( )
377418
378419 let files : string [ ]
379420 try {
@@ -408,46 +449,118 @@ function main() {
408449 allSnapshots = allSnapshots . filter (
409450 ( s ) => s . snapshot . agentType === agentFilter ,
410451 )
411- console . log (
412- `Filtered to ${ allSnapshots . length } snapshot(s) for agent type: ${ agentFilter } ` ,
452+ }
453+
454+ if ( runFilter ) {
455+ allSnapshots = allSnapshots . filter (
456+ ( s ) => s . snapshot . runId === runFilter || s . snapshot . runId ?. startsWith ( runFilter ) ,
413457 )
414- } else {
415- console . log ( `Found ${ allSnapshots . length } snapshot(s) in ${ dir } ` )
416- const agentTypes = [ ...new Set ( allSnapshots . map ( ( s ) => s . snapshot . agentType ) ) ]
417- if ( agentTypes . length > 1 ) {
418- console . log (
419- `\n⚠️ Multiple agent types found: ${ agentTypes . join ( ', ' ) } ` ,
420- )
421- console . log (
422- ' Use --agent <type> to filter (e.g. --agent base2)' ,
423- )
424- }
458+ }
459+
460+ console . log ( `Found ${ allSnapshots . length } snapshot(s) in ${ dir } ` )
461+ if ( agentFilter ) {
462+ console . log ( ` Filtered to agent type: ${ agentFilter } ` )
463+ }
464+ if ( runFilter ) {
465+ console . log ( ` Filtered to run: ${ runFilter } ` )
425466 }
426467
427468 const withProviderRequest = allSnapshots . filter ( ( s ) => s . snapshot . providerRequest !== undefined ) . length
428469 console . log ( ` Provider request data: ${ withProviderRequest } /${ allSnapshots . length } snapshots` )
429470
430- console . log (
431- '\nFiles:' ,
432- allSnapshots . map ( ( s ) => ` ${ s . filename } ` ) . join ( '\n' ) ,
433- )
434-
435471 if ( allSnapshots . length < 2 ) {
436472 console . error ( '\nNeed at least 2 snapshots to compare. Send another prompt.' )
437473 process . exit ( 1 )
438474 }
439475
440- for ( let i = 1 ; i < allSnapshots . length ; i ++ ) {
441- comparePair (
442- allSnapshots [ i - 1 ] . snapshot ,
443- allSnapshots [ i ] . snapshot ,
444- allSnapshots [ i - 1 ] . filename ,
445- allSnapshots [ i ] . filename ,
476+ if ( crossRun ) {
477+ // Old behavior: compare all snapshots sequentially
478+ console . log ( '\nMode: cross-run (comparing all snapshots sequentially)' )
479+ console . log (
480+ '\nFiles:' ,
481+ allSnapshots . map ( ( s ) => ` ${ s . filename } ` ) . join ( '\n' ) ,
446482 )
483+
484+ let totalPairs = 0
485+ for ( let i = 1 ; i < allSnapshots . length ; i ++ ) {
486+ comparePair (
487+ allSnapshots [ i - 1 ] . snapshot ,
488+ allSnapshots [ i ] . snapshot ,
489+ allSnapshots [ i - 1 ] . filename ,
490+ allSnapshots [ i ] . filename ,
491+ )
492+ totalPairs ++
493+ }
494+
495+ console . log ( `\n${ '═' . repeat ( 80 ) } ` )
496+ console . log ( ` Summary: compared ${ totalPairs } consecutive pair(s) across all runs` )
497+ console . log ( `${ '═' . repeat ( 80 ) } \n` )
498+ return
499+ }
500+
501+ // Default: group by runId and compare within each run
502+ const byRun = new Map < string , Array < { snapshot : Snapshot ; filename : string } > > ( )
503+ const noRunId : Array < { snapshot : Snapshot ; filename : string } > = [ ]
504+
505+ for ( const s of allSnapshots ) {
506+ const runId = s . snapshot . runId
507+ if ( ! runId ) {
508+ noRunId . push ( s )
509+ continue
510+ }
511+ if ( ! byRun . has ( runId ) ) {
512+ byRun . set ( runId , [ ] )
513+ }
514+ byRun . get ( runId ) ! . push ( s )
515+ }
516+
517+ // Filter to runs with at least 2 steps
518+ const multiStepRuns = [ ...byRun . entries ( ) ] . filter ( ( [ , snaps ] ) => snaps . length >= 2 )
519+ const singleStepRuns = [ ...byRun . entries ( ) ] . filter ( ( [ , snaps ] ) => snaps . length < 2 )
520+
521+ console . log ( `\n Runs: ${ byRun . size } total, ${ multiStepRuns . length } with multiple steps` )
522+ if ( singleStepRuns . length > 0 ) {
523+ console . log ( ` Skipping ${ singleStepRuns . length } single-step run(s)` )
524+ }
525+ if ( noRunId . length > 0 ) {
526+ console . log ( ` Skipping ${ noRunId . length } snapshot(s) without runId` )
527+ }
528+
529+ let totalPairs = 0
530+
531+ for ( const [ runId , snaps ] of multiStepRuns ) {
532+ // Sort by index (step number), then by timestamp as tiebreaker
533+ snaps . sort ( ( a , b ) => {
534+ if ( a . snapshot . index !== b . snapshot . index ) {
535+ return a . snapshot . index - b . snapshot . index
536+ }
537+ return a . snapshot . timestamp . localeCompare ( b . snapshot . timestamp )
538+ } )
539+
540+ console . log ( `\n${ '═' . repeat ( 80 ) } ` )
541+ console . log ( ` Run: ${ runId } (${ snaps . length } steps)` )
542+ console . log ( ` Agent: ${ snaps [ 0 ] . snapshot . agentType } Model: ${ snaps [ 0 ] . snapshot . model ?? 'unknown' } ` )
543+ console . log ( `${ '═' . repeat ( 80 ) } ` )
544+
545+ // Print step overview
546+ for ( const s of snaps ) {
547+ console . log ( ` Step ${ s . snapshot . index } : ${ s . snapshot . preConversion . messages . length } msgs (${ s . filename } )` )
548+ }
549+
550+ // Compare consecutive steps
551+ for ( let i = 1 ; i < snaps . length ; i ++ ) {
552+ comparePair (
553+ snaps [ i - 1 ] . snapshot ,
554+ snaps [ i ] . snapshot ,
555+ snaps [ i - 1 ] . filename ,
556+ snaps [ i ] . filename ,
557+ )
558+ totalPairs ++
559+ }
447560 }
448561
449562 console . log ( `\n${ '═' . repeat ( 80 ) } ` )
450- console . log ( ` Summary: compared ${ allSnapshots . length - 1 } consecutive pair(s)` )
563+ console . log ( ` Summary: compared ${ totalPairs } consecutive step pair(s) across ${ multiStepRuns . length } run (s)` )
451564 console . log ( `${ '═' . repeat ( 80 ) } \n` )
452565}
453566
0 commit comments