Skip to content

Commit cda474d

Browse files
committed
Prune context whenever it's been 5 minutes since last message (saving lots of money from cache misses!)
1 parent e8a3a9f commit cda474d

File tree

5 files changed

+54
-4
lines changed

5 files changed

+54
-4
lines changed

.agents/context-pruner.ts

Lines changed: 35 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,9 @@ const definition: AgentDefinition = {
3636
const USER_MESSAGE_LIMIT = 20000
3737
const ASSISTANT_MESSAGE_LIMIT = 5000
3838

39+
// Prompt cache expiry time (Anthropic caches for 5 minutes)
40+
const CACHE_EXPIRY_MS = 5 * 60 * 1000
41+
3942
// Helper to truncate long text with 80% beginning + 20% end
4043
const truncateLongText = (text: string, limit: number): string => {
4144
if (text.length <= limit) {
@@ -72,9 +75,34 @@ const definition: AgentDefinition = {
7275
currentMessages.splice(lastSubagentSpawnIndex, 1)
7376
}
7477

75-
// Check if we need to prune at all (prune when context exceeds max)
78+
// Check for prompt cache miss (>5 min gap before the USER_PROMPT message)
79+
// The USER_PROMPT is the actual user message; INSTRUCTIONS_PROMPT comes after it
80+
// We need to find the USER_PROMPT and check the gap between it and the last assistant message
81+
let cacheWillMiss = false
82+
const userPromptIndex = currentMessages.findLastIndex(
83+
(message) => message.tags?.includes('USER_PROMPT'),
84+
)
85+
if (userPromptIndex > 0) {
86+
const userPromptMsg = currentMessages[userPromptIndex]
87+
// Find the last assistant message before USER_PROMPT (tool messages don't have sentAt)
88+
let lastAssistantMsg: Message | undefined
89+
for (let i = userPromptIndex - 1; i >= 0; i--) {
90+
if (currentMessages[i].role === 'assistant') {
91+
lastAssistantMsg = currentMessages[i]
92+
break
93+
}
94+
}
95+
if (userPromptMsg.sentAt && lastAssistantMsg?.sentAt) {
96+
const gap = userPromptMsg.sentAt - lastAssistantMsg.sentAt
97+
cacheWillMiss = gap > CACHE_EXPIRY_MS
98+
}
99+
}
100+
101+
// Check if we need to prune at all:
102+
// - Prune when context exceeds max, OR
103+
// - Prune when prompt cache will miss (>5 min gap) to take advantage of fresh context
76104
// If not, return messages with just the subagent-specific tags removed
77-
if (agentState.contextTokenCount <= maxContextLength) {
105+
if (agentState.contextTokenCount <= maxContextLength && !cacheWillMiss) {
78106
yield {
79107
toolName: 'set_messages',
80108
input: { messages: currentMessages },
@@ -525,7 +553,8 @@ const definition: AgentDefinition = {
525553
}
526554
}
527555

528-
// Create the summarized message
556+
// Create the summarized message with fresh sentAt timestamp
557+
const now = Date.now()
529558
const summarizedMessage: Message = {
530559
role: 'user',
531560
content: [
@@ -540,12 +569,14 @@ ${summaryText}
540569
Please continue the conversation from here. In particular, try to address the user's latest request detailed in the summary above. You may need to re-gather context (e.g. read some files) to get up to speed and then tackle the user's request.`,
541570
},
542571
],
572+
sentAt: now,
543573
}
544574

545575
// Build final messages array: summary first, then INSTRUCTIONS_PROMPT if it exists
546576
const finalMessages: Message[] = [summarizedMessage]
547577
if (instructionsPromptMessage) {
548-
finalMessages.push(instructionsPromptMessage)
578+
// Update sentAt to current time so future cache miss checks use fresh timestamps
579+
finalMessages.push({ ...instructionsPromptMessage, sentAt: now })
549580
}
550581

551582
yield {

.agents/types/util-types.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,6 +90,13 @@ export type AuxiliaryMessageData = {
9090
providerOptions?: ProviderMetadata
9191
tags?: string[]
9292

93+
/**
94+
* Unix timestamp (ms) when the message was added to history.
95+
* Used to detect prompt cache expiry (>5 min gap = cache miss).
96+
* This field is stripped before sending to the LLM.
97+
*/
98+
sentAt?: number
99+
93100
/** @deprecated Use tags instead. */
94101
timeToLive?: 'agentStep' | 'userPrompt'
95102
/** @deprecated Use tags instead. */

common/src/types/messages/codebuff-message.ts

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,13 @@ export type AuxiliaryMessageData = {
1212
providerOptions?: ProviderMetadata
1313
tags?: string[]
1414

15+
/**
16+
* Unix timestamp (ms) when the message was added to history.
17+
* Used to detect prompt cache expiry (>5 min gap = cache miss).
18+
* This field is stripped before sending to the LLM.
19+
*/
20+
sentAt?: number
21+
1522
// James: All the below is overly prescriptive for the framework.
1623
// Instead, let's tag what the message is, and let the user decide time to live, keep during truncation, etc.
1724
/** @deprecated Use tags instead. */

common/src/util/messages.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -391,11 +391,13 @@ export function userMessage(
391391
...params,
392392
role: 'user',
393393
content: userContent(params.content),
394+
sentAt: Date.now(),
394395
}
395396
}
396397
return {
397398
role: 'user',
398399
content: userContent(params),
400+
sentAt: Date.now(),
399401
}
400402
}
401403

@@ -427,11 +429,13 @@ export function assistantMessage(
427429
...params,
428430
role: 'assistant',
429431
content: assistantContent(params.content),
432+
sentAt: Date.now(),
430433
}
431434
}
432435
return {
433436
role: 'assistant',
434437
content: assistantContent(params),
438+
sentAt: Date.now(),
435439
}
436440
}
437441

packages/agent-runtime/src/run-agent-step.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -669,6 +669,7 @@ export async function loopAgentSteps(
669669
role: 'user' as const,
670670
content: buildUserMessageContent(prompt, spawnParams, content),
671671
tags: ['USER_PROMPT'],
672+
sentAt: Date.now(),
672673

673674
// James: Deprecate the below, only use tags, which are not prescriptive.
674675
keepDuringTruncation: true,

0 commit comments

Comments
 (0)