Prune context whenever it's been 5 minutes since last message (saving lots of money from cache misses!)

jahooma · jahooma · commit cda474d0cd24 · 2026-01-04T13:26:24.000-08:00
diff --git a/.agents/context-pruner.ts b/.agents/context-pruner.ts
@@ -36,6 +36,9 @@ const definition: AgentDefinition = {
     const USER_MESSAGE_LIMIT = 20000
     const ASSISTANT_MESSAGE_LIMIT = 5000
 
+    // Prompt cache expiry time (Anthropic caches for 5 minutes)
+    const CACHE_EXPIRY_MS = 5 * 60 * 1000
+
     // Helper to truncate long text with 80% beginning + 20% end
     const truncateLongText = (text: string, limit: number): string => {
       if (text.length <= limit) {
@@ -72,9 +75,34 @@ const definition: AgentDefinition = {
       currentMessages.splice(lastSubagentSpawnIndex, 1)
     }
 
-    // Check if we need to prune at all (prune when context exceeds max)
+    // Check for prompt cache miss (>5 min gap before the USER_PROMPT message)
+    // The USER_PROMPT is the actual user message; INSTRUCTIONS_PROMPT comes after it
+    // We need to find the USER_PROMPT and check the gap between it and the last assistant message
+    let cacheWillMiss = false
+    const userPromptIndex = currentMessages.findLastIndex(
+      (message) => message.tags?.includes('USER_PROMPT'),
+    )
+    if (userPromptIndex > 0) {
+      const userPromptMsg = currentMessages[userPromptIndex]
+      // Find the last assistant message before USER_PROMPT (tool messages don't have sentAt)
+      let lastAssistantMsg: Message | undefined
+      for (let i = userPromptIndex - 1; i >= 0; i--) {
+        if (currentMessages[i].role === 'assistant') {
+          lastAssistantMsg = currentMessages[i]
+          break
+        }
+      }
+      if (userPromptMsg.sentAt && lastAssistantMsg?.sentAt) {
+        const gap = userPromptMsg.sentAt - lastAssistantMsg.sentAt
+        cacheWillMiss = gap > CACHE_EXPIRY_MS
+      }
+    }
+
+    // Check if we need to prune at all:
+    // - Prune when context exceeds max, OR
+    // - Prune when prompt cache will miss (>5 min gap) to take advantage of fresh context
     // If not, return messages with just the subagent-specific tags removed
-    if (agentState.contextTokenCount <= maxContextLength) {
+    if (agentState.contextTokenCount <= maxContextLength && !cacheWillMiss) {
       yield {
         toolName: 'set_messages',
         input: { messages: currentMessages },
@@ -525,7 +553,8 @@ const definition: AgentDefinition = {
       }
     }
 
-    // Create the summarized message
+    // Create the summarized message with fresh sentAt timestamp
+    const now = Date.now()
     const summarizedMessage: Message = {
       role: 'user',
       content: [
@@ -540,12 +569,14 @@ ${summaryText}
 Please continue the conversation from here. In particular, try to address the user's latest request detailed in the summary above. You may need to re-gather context (e.g. read some files) to get up to speed and then tackle the user's request.`,
         },
       ],
+      sentAt: now,
     }
 
     // Build final messages array: summary first, then INSTRUCTIONS_PROMPT if it exists
     const finalMessages: Message[] = [summarizedMessage]
     if (instructionsPromptMessage) {
-      finalMessages.push(instructionsPromptMessage)
+      // Update sentAt to current time so future cache miss checks use fresh timestamps
+      finalMessages.push({ ...instructionsPromptMessage, sentAt: now })
     }
 
     yield {
diff --git a/.agents/types/util-types.ts b/.agents/types/util-types.ts
@@ -90,6 +90,13 @@ export type AuxiliaryMessageData = {
   providerOptions?: ProviderMetadata
   tags?: string[]
 
+  /**
+   * Unix timestamp (ms) when the message was added to history.
+   * Used to detect prompt cache expiry (>5 min gap = cache miss).
+   * This field is stripped before sending to the LLM.
+   */
+  sentAt?: number
+
   /** @deprecated Use tags instead. */
   timeToLive?: 'agentStep' | 'userPrompt'
   /** @deprecated Use tags instead. */
diff --git a/common/src/types/messages/codebuff-message.ts b/common/src/types/messages/codebuff-message.ts
@@ -12,6 +12,13 @@ export type AuxiliaryMessageData = {
   providerOptions?: ProviderMetadata
   tags?: string[]
 
+  /**
+   * Unix timestamp (ms) when the message was added to history.
+   * Used to detect prompt cache expiry (>5 min gap = cache miss).
+   * This field is stripped before sending to the LLM.
+   */
+  sentAt?: number
+
   // James: All the below is overly prescriptive for the framework.
   // Instead, let's tag what the message is, and let the user decide time to live, keep during truncation, etc.
   /** @deprecated Use tags instead. */
diff --git a/common/src/util/messages.ts b/common/src/util/messages.ts
@@ -391,11 +391,13 @@ export function userMessage(
       ...params,
       role: 'user',
       content: userContent(params.content),
+      sentAt: Date.now(),
     }
   }
   return {
     role: 'user',
     content: userContent(params),
+    sentAt: Date.now(),
   }
 }
 
@@ -427,11 +429,13 @@ export function assistantMessage(
       ...params,
       role: 'assistant',
       content: assistantContent(params.content),
+      sentAt: Date.now(),
     }
   }
   return {
     role: 'assistant',
     content: assistantContent(params),
+    sentAt: Date.now(),
   }
 }
 
diff --git a/packages/agent-runtime/src/run-agent-step.ts b/packages/agent-runtime/src/run-agent-step.ts
@@ -669,6 +669,7 @@ export async function loopAgentSteps(
         role: 'user' as const,
         content: buildUserMessageContent(prompt, spawnParams, content),
         tags: ['USER_PROMPT'],
+        sentAt: Date.now(),
 
         // James: Deprecate the below, only use tags, which are not prescriptive.
         keepDuringTruncation: true,

Original file line number	Diff line number	Diff line change
`@@ -391,11 +391,13 @@ export function userMessage(`
`391`	`391`	`...params,`
`392`	`392`	`role: 'user',`
`393`	`393`	`content: userContent(params.content),`
	`394`	`+ sentAt: Date.now(),`
`394`	`395`	`}`
`395`	`396`	`}`
`396`	`397`	`return {`
`397`	`398`	`role: 'user',`
`398`	`399`	`content: userContent(params),`
	`400`	`+ sentAt: Date.now(),`
`399`	`401`	`}`
`400`	`402`	`}`
`401`	`403`
`@@ -427,11 +429,13 @@ export function assistantMessage(`
`427`	`429`	`...params,`
`428`	`430`	`role: 'assistant',`
`429`	`431`	`content: assistantContent(params.content),`
	`432`	`+ sentAt: Date.now(),`
`430`	`433`	`}`
`431`	`434`	`}`
`432`	`435`	`return {`
`433`	`436`	`role: 'assistant',`
`434`	`437`	`content: assistantContent(params),`
	`438`	`+ sentAt: Date.now(),`
`435`	`439`	`}`
`436`	`440`	`}`
`437`	`441`