Skip to content

Commit 35d7186

Browse files
committed
Update cache debug script
1 parent c949d77 commit 35d7186

File tree

1 file changed

+166
-53
lines changed

1 file changed

+166
-53
lines changed

scripts/compare-cache-debug.ts

Lines changed: 166 additions & 53 deletions
Original file line numberDiff line numberDiff line change
@@ -4,10 +4,14 @@
44
* Compare sequential cache debug snapshots to find what's causing prompt cache misses.
55
*
66
* Usage:
7-
* bun scripts/compare-cache-debug.ts [directory] [--agent <type>]
7+
* bun scripts/compare-cache-debug.ts [directory] [--agent <type>] [--run <runId>] [--cross-run]
88
*
99
* Options:
10-
* --agent <type> Only compare snapshots from this agent type (e.g. base2)
10+
* --agent <type> Only compare snapshots from this agent type (e.g. base2)
11+
* --run <runId> Only compare snapshots from this specific run
12+
* --cross-run Compare all snapshots sequentially (old behavior, across runs)
13+
*
14+
* Default: groups snapshots by runId and compares consecutive steps within each run.
1115
*
1216
* Default directory: debug/cache-debug/
1317
*
@@ -134,6 +138,20 @@ function printSectionHeader(title: string) {
134138
console.log(`${'─'.repeat(80)}`)
135139
}
136140

141+
function stripCacheControlFromMessage(msg: unknown): unknown {
142+
if (!msg || typeof msg !== 'object') return msg
143+
const obj = JSON.parse(JSON.stringify(msg))
144+
delete obj.cache_control
145+
if (Array.isArray(obj.content)) {
146+
for (const part of obj.content) {
147+
if (part && typeof part === 'object') {
148+
delete part.cache_control
149+
}
150+
}
151+
}
152+
return obj
153+
}
154+
137155
function compareProviderRequests(
138156
prev: Snapshot['providerRequest'],
139157
curr: Snapshot['providerRequest'],
@@ -199,13 +217,27 @@ function compareProviderRequests(
199217
console.log(` ✅ messages: identical (${prevMsgs.length} messages)`)
200218
} else {
201219
console.log(` ❌ messages: differ (${prevMsgs.length}${currMsgs.length})`)
220+
221+
// Compare with cache_control stripped to check structural stability
202222
const minLen = Math.min(prevMsgs.length, currMsgs.length)
223+
let firstRawDiff = -1
224+
let firstStructDiff = -1
203225
for (let i = 0; i < minLen; i++) {
204-
if (JSON.stringify(prevMsgs[i]) !== JSON.stringify(currMsgs[i])) {
205-
console.log(` First diff at message index ${i}`)
206-
break
226+
if (firstRawDiff < 0 && JSON.stringify(prevMsgs[i]) !== JSON.stringify(currMsgs[i])) {
227+
firstRawDiff = i
228+
}
229+
if (firstStructDiff < 0 && JSON.stringify(stripCacheControlFromMessage(prevMsgs[i])) !== JSON.stringify(stripCacheControlFromMessage(currMsgs[i]))) {
230+
firstStructDiff = i
207231
}
208232
}
233+
if (firstRawDiff >= 0) {
234+
console.log(` First raw diff at message index ${firstRawDiff}`)
235+
}
236+
if (firstStructDiff >= 0) {
237+
console.log(` First structural diff (ignoring cache_control) at message index ${firstStructDiff}`)
238+
} else if (prevMsgs.length === currMsgs.length) {
239+
console.log(` ✅ Structurally identical (only cache_control placement differs)`)
240+
}
209241
if (prevMsgs.length !== currMsgs.length) {
210242
console.log(` Message count: ${prevMsgs.length}${currMsgs.length}`)
211243
}
@@ -218,7 +250,7 @@ function compareProviderRequests(
218250

219251
function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile: string) {
220252
printSectionHeader(
221-
`Comparing snapshot ${prev.index}${curr.index} (${prev.agentType})`,
253+
`Comparing step ${prev.index}${curr.index} (${prev.agentType})`,
222254
)
223255
console.log(` File A: ${prevFile}`)
224256
console.log(` File B: ${currFile}`)
@@ -229,8 +261,8 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile:
229261
if (prev.systemHash || curr.systemHash) {
230262
console.log(` Hashes: system=${prev.systemHash ?? '?'}${curr.systemHash ?? '?'} tools=${prev.toolsHash ?? '?'}${curr.toolsHash ?? '?'}`)
231263
}
232-
if (prev.runId || curr.runId) {
233-
console.log(` RunId: ${prev.runId ?? '?'}${curr.runId ?? '?'}`)
264+
if (prev.runId !== curr.runId) {
265+
console.log(` ⚠️ Different runs: ${prev.runId ?? '?'}${curr.runId ?? '?'}`)
234266
}
235267

236268
const prevSystem = prev.preConversion.systemPrompt
@@ -323,11 +355,6 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile:
323355
console.log('\n 🎯 Cache Verdict:')
324356
const systemIdentical = prevSystem === currSystem
325357
const toolsIdentical = prevToolJson === currToolJson
326-
const providerNormIdentical =
327-
prev.providerRequest && curr.providerRequest
328-
? JSON.stringify(prev.providerRequest.normalized) ===
329-
JSON.stringify(curr.providerRequest.normalized)
330-
: undefined
331358

332359
if (systemIdentical && toolsIdentical) {
333360
console.log(
@@ -340,40 +367,54 @@ function comparePair(prev: Snapshot, curr: Snapshot, prevFile: string, currFile:
340367
console.log(` ❌ PRE-CONVERSION CACHE MISS expected — ${causes.join(' and ')}`)
341368
}
342369

343-
if (providerNormIdentical === true) {
344-
console.log(
345-
' ✅ Post-conversion (provider) request bodies are IDENTICAL',
346-
)
347-
} else if (providerNormIdentical === false) {
348-
console.log(
349-
' ❌ Post-conversion (provider) request bodies DIFFER — conversion layer may be introducing instability',
350-
)
351-
if (systemIdentical && toolsIdentical) {
352-
console.log(
353-
' ⚠️ Pre-conversion was identical but post-conversion differs — bug is in the conversion layer!',
354-
)
370+
// Check post-conversion structural stability (ignoring cache_control positions)
371+
if (prev.providerRequest?.normalized && curr.providerRequest?.normalized) {
372+
const prevObj = prev.providerRequest.normalized as Record<string, unknown>
373+
const currObj = curr.providerRequest.normalized as Record<string, unknown>
374+
if (Array.isArray(prevObj.messages) && Array.isArray(currObj.messages)) {
375+
const prevMsgs = prevObj.messages as unknown[]
376+
const currMsgs = currObj.messages as unknown[]
377+
const minLen = Math.min(prevMsgs.length, currMsgs.length)
378+
let sharedStructural = 0
379+
for (let i = 0; i < minLen; i++) {
380+
if (JSON.stringify(stripCacheControlFromMessage(prevMsgs[i])) === JSON.stringify(stripCacheControlFromMessage(currMsgs[i]))) {
381+
sharedStructural++
382+
} else {
383+
break
384+
}
385+
}
386+
console.log(` 📊 Post-conversion shared prefix: ${sharedStructural}/${minLen} messages (ignoring cache_control)`)
387+
if (sharedStructural < minLen && systemIdentical && toolsIdentical) {
388+
console.log(` ⚠️ Structural content differs in shared prefix — possible conversion issue`)
389+
}
355390
}
356391
}
357392
}
358393

359-
function parseArgs(): { dir: string; agentFilter?: string } {
394+
function parseArgs(): { dir: string; agentFilter?: string; runFilter?: string; crossRun: boolean } {
360395
const args = process.argv.slice(2)
361396
let dir = join(process.cwd(), 'debug', 'cache-debug')
362397
let agentFilter: string | undefined
398+
let runFilter: string | undefined
399+
let crossRun = false
363400

364401
for (let i = 0; i < args.length; i++) {
365402
if (args[i] === '--agent' && i + 1 < args.length) {
366403
agentFilter = args[++i]
404+
} else if (args[i] === '--run' && i + 1 < args.length) {
405+
runFilter = args[++i]
406+
} else if (args[i] === '--cross-run') {
407+
crossRun = true
367408
} else if (!args[i].startsWith('--')) {
368409
dir = args[i]
369410
}
370411
}
371412

372-
return { dir, agentFilter }
413+
return { dir, agentFilter, runFilter, crossRun }
373414
}
374415

375416
function main() {
376-
const { dir, agentFilter } = parseArgs()
417+
const { dir, agentFilter, runFilter, crossRun } = parseArgs()
377418

378419
let files: string[]
379420
try {
@@ -408,46 +449,118 @@ function main() {
408449
allSnapshots = allSnapshots.filter(
409450
(s) => s.snapshot.agentType === agentFilter,
410451
)
411-
console.log(
412-
`Filtered to ${allSnapshots.length} snapshot(s) for agent type: ${agentFilter}`,
452+
}
453+
454+
if (runFilter) {
455+
allSnapshots = allSnapshots.filter(
456+
(s) => s.snapshot.runId === runFilter || s.snapshot.runId?.startsWith(runFilter),
413457
)
414-
} else {
415-
console.log(`Found ${allSnapshots.length} snapshot(s) in ${dir}`)
416-
const agentTypes = [...new Set(allSnapshots.map((s) => s.snapshot.agentType))]
417-
if (agentTypes.length > 1) {
418-
console.log(
419-
`\n⚠️ Multiple agent types found: ${agentTypes.join(', ')}`,
420-
)
421-
console.log(
422-
' Use --agent <type> to filter (e.g. --agent base2)',
423-
)
424-
}
458+
}
459+
460+
console.log(`Found ${allSnapshots.length} snapshot(s) in ${dir}`)
461+
if (agentFilter) {
462+
console.log(` Filtered to agent type: ${agentFilter}`)
463+
}
464+
if (runFilter) {
465+
console.log(` Filtered to run: ${runFilter}`)
425466
}
426467

427468
const withProviderRequest = allSnapshots.filter((s) => s.snapshot.providerRequest !== undefined).length
428469
console.log(` Provider request data: ${withProviderRequest}/${allSnapshots.length} snapshots`)
429470

430-
console.log(
431-
'\nFiles:',
432-
allSnapshots.map((s) => ` ${s.filename}`).join('\n'),
433-
)
434-
435471
if (allSnapshots.length < 2) {
436472
console.error('\nNeed at least 2 snapshots to compare. Send another prompt.')
437473
process.exit(1)
438474
}
439475

440-
for (let i = 1; i < allSnapshots.length; i++) {
441-
comparePair(
442-
allSnapshots[i - 1].snapshot,
443-
allSnapshots[i].snapshot,
444-
allSnapshots[i - 1].filename,
445-
allSnapshots[i].filename,
476+
if (crossRun) {
477+
// Old behavior: compare all snapshots sequentially
478+
console.log('\nMode: cross-run (comparing all snapshots sequentially)')
479+
console.log(
480+
'\nFiles:',
481+
allSnapshots.map((s) => ` ${s.filename}`).join('\n'),
446482
)
483+
484+
let totalPairs = 0
485+
for (let i = 1; i < allSnapshots.length; i++) {
486+
comparePair(
487+
allSnapshots[i - 1].snapshot,
488+
allSnapshots[i].snapshot,
489+
allSnapshots[i - 1].filename,
490+
allSnapshots[i].filename,
491+
)
492+
totalPairs++
493+
}
494+
495+
console.log(`\n${'═'.repeat(80)}`)
496+
console.log(` Summary: compared ${totalPairs} consecutive pair(s) across all runs`)
497+
console.log(`${'═'.repeat(80)}\n`)
498+
return
499+
}
500+
501+
// Default: group by runId and compare within each run
502+
const byRun = new Map<string, Array<{ snapshot: Snapshot; filename: string }>>()
503+
const noRunId: Array<{ snapshot: Snapshot; filename: string }> = []
504+
505+
for (const s of allSnapshots) {
506+
const runId = s.snapshot.runId
507+
if (!runId) {
508+
noRunId.push(s)
509+
continue
510+
}
511+
if (!byRun.has(runId)) {
512+
byRun.set(runId, [])
513+
}
514+
byRun.get(runId)!.push(s)
515+
}
516+
517+
// Filter to runs with at least 2 steps
518+
const multiStepRuns = [...byRun.entries()].filter(([, snaps]) => snaps.length >= 2)
519+
const singleStepRuns = [...byRun.entries()].filter(([, snaps]) => snaps.length < 2)
520+
521+
console.log(`\n Runs: ${byRun.size} total, ${multiStepRuns.length} with multiple steps`)
522+
if (singleStepRuns.length > 0) {
523+
console.log(` Skipping ${singleStepRuns.length} single-step run(s)`)
524+
}
525+
if (noRunId.length > 0) {
526+
console.log(` Skipping ${noRunId.length} snapshot(s) without runId`)
527+
}
528+
529+
let totalPairs = 0
530+
531+
for (const [runId, snaps] of multiStepRuns) {
532+
// Sort by index (step number), then by timestamp as tiebreaker
533+
snaps.sort((a, b) => {
534+
if (a.snapshot.index !== b.snapshot.index) {
535+
return a.snapshot.index - b.snapshot.index
536+
}
537+
return a.snapshot.timestamp.localeCompare(b.snapshot.timestamp)
538+
})
539+
540+
console.log(`\n${'═'.repeat(80)}`)
541+
console.log(` Run: ${runId} (${snaps.length} steps)`)
542+
console.log(` Agent: ${snaps[0].snapshot.agentType} Model: ${snaps[0].snapshot.model ?? 'unknown'}`)
543+
console.log(`${'═'.repeat(80)}`)
544+
545+
// Print step overview
546+
for (const s of snaps) {
547+
console.log(` Step ${s.snapshot.index}: ${s.snapshot.preConversion.messages.length} msgs (${s.filename})`)
548+
}
549+
550+
// Compare consecutive steps
551+
for (let i = 1; i < snaps.length; i++) {
552+
comparePair(
553+
snaps[i - 1].snapshot,
554+
snaps[i].snapshot,
555+
snaps[i - 1].filename,
556+
snaps[i].filename,
557+
)
558+
totalPairs++
559+
}
447560
}
448561

449562
console.log(`\n${'═'.repeat(80)}`)
450-
console.log(` Summary: compared ${allSnapshots.length - 1} consecutive pair(s)`)
563+
console.log(` Summary: compared ${totalPairs} consecutive step pair(s) across ${multiStepRuns.length} run(s)`)
451564
console.log(`${'═'.repeat(80)}\n`)
452565
}
453566

0 commit comments

Comments
 (0)