add summary buffer config

Tarquinen · Tarquinen · commit 187b9bad2ee9 · 2026-03-24T21:33:42.000-04:00
diff --git a/README.md b/README.md
@@ -32,8 +32,8 @@ Compress is a tool exposed to your model that replaces closed, stale conversatio
 
 DCP supports two compression modes:
 
-- `range` mode compresses a contiguous span of conversation into one or more reusable block summaries.
-- `message` mode is experimental and compresses individual raw messages independently, letting the model manage context much more surgically around closed work.
+- `range` mode compresses contiguous spans of conversation into one or more summaries.
+- `message` mode (experimental) compresses individual raw messages independently, letting the model manage context much more surgically.
 
 In `range` mode, when a new compression overlaps an earlier one, the earlier summary is nested inside the new one so information is preserved through layers of compression rather than diluted away. In both modes, protected tool outputs (such as subagents and skills) and protected file patterns are kept in compression summaries, ensuring that the most important information is never lost. You can also enable `protectUserMessages` to preserve your messages verbatim during compression, though note that large prompts (e.g. copy-pasting log files in the prompt) will then never be compressed away.
 
@@ -55,6 +55,9 @@ DCP uses its own config file, searched in order:
 
 Each level overrides the previous, so project settings take priority over global. Restart OpenCode after making config changes.
 
+> [!NOTE]
+> If you use models with smaller context windows, such as GitHub Copilot models or local models, lower `compress.minContextLimit` and `compress.maxContextLimit` in your configuration to match the available context.
+
 > [!IMPORTANT]
 > Defaults are applied automatically. Expand this if you want to review or override settings.
 
@@ -111,10 +114,12 @@ Each level overrides the previous, so project settings take priority over global
         "permission": "allow",
         // Show compression content in a chat notification
         "showCompression": false,
+        // Let active summary tokens extend the effective maxContextLimit
+        "summaryBuffer": true,
         // Soft upper threshold: above this, DCP keeps injecting strong
         // compression nudges (based on nudgeFrequency), so compression is
         // much more likely. Accepts: number or "X%" of model context window.
-        "maxContextLimit": 150000,
+        "maxContextLimit": 100000,
         // Soft lower threshold for reminder nudges: below this, turn/iteration
         // reminders are off (compression less likely). At/above this, reminders
         // are on. Accepts: number or "X%" of model context window.
@@ -201,9 +206,6 @@ To customize behavior, add a file with the same name under an overrides director
 
 To reset an override, delete the matching file from your overrides directory.
 
-> [!NOTE]
-> `compress-range` and `compress-message` prompt changes apply after plugin restart because tool descriptions are registered at startup.
-
 ### Protected Tools
 
 By default, these tools are always protected from pruning:
diff --git a/dcp.schema.json b/dcp.schema.json
@@ -145,9 +145,14 @@
                     "default": false,
                     "description": "Show compression summaries in notifications"
                 },
+                "summaryBuffer": {
+                    "type": "boolean",
+                    "default": true,
+                    "description": "When enabled, active summary tokens extend the effective maxContextLimit used for context-limit nudges."
+                },
                 "maxContextLimit": {
                     "description": "Soft upper threshold. Above this, DCP keeps sending strong compression nudges (based on nudgeFrequency), so the model is pushed to compress. Accepts number or \"X%\" of the model context window.",
-                    "default": 150000,
+                    "default": 100000,
                     "oneOf": [
                         {
                             "type": "number"
@@ -237,7 +242,8 @@
                 "mode": "range",
                 "permission": "allow",
                 "showCompression": false,
-                "maxContextLimit": 150000,
+                "summaryBuffer": true,
+                "maxContextLimit": 100000,
                 "minContextLimit": 50000,
                 "nudgeFrequency": 5,
                 "iterationNudgeThreshold": 15,
diff --git a/lib/compress/message.ts b/lib/compress/message.ts
@@ -107,6 +107,7 @@ export function createCompressMessageTool(ctx: ToolContext): ReturnType<typeof t
                         mode: "message",
                         runId,
                         compressMessageId: toolCtx.messageID,
+                        summaryTokens,
                     },
                     plan.selection,
                     plan.anchorMessageId,
diff --git a/lib/compress/range.ts b/lib/compress/range.ts
@@ -148,6 +148,7 @@ export function createCompressRangeTool(ctx: ToolContext): ReturnType<typeof too
                         mode: "range",
                         runId,
                         compressMessageId: toolCtx.messageID,
+                        summaryTokens,
                     },
                     preparedPlan.selection,
                     preparedPlan.anchorMessageId,
diff --git a/lib/compress/state.ts b/lib/compress/state.ts
@@ -92,6 +92,7 @@ export function applyCompressionState(
         active: true,
         deactivatedByUser: false,
         compressedTokens: 0,
+        summaryTokens: input.summaryTokens,
         mode: input.mode,
         topic: input.topic,
         batchTopic: input.batchTopic,
diff --git a/lib/compress/types.ts b/lib/compress/types.ts
@@ -102,4 +102,5 @@ export interface CompressionStateInput {
     mode: CompressionMode
     runId: number
     compressMessageId: string
+    summaryTokens: number
 }
diff --git a/lib/config.ts b/lib/config.ts
@@ -16,6 +16,7 @@ export interface CompressConfig {
     mode: CompressMode
     permission: Permission
     showCompression: boolean
+    summaryBuffer: boolean
     maxContextLimit: number | `${number}%`
     minContextLimit: number | `${number}%`
     modelMaxLimits?: Record<string, number | `${number}%`>
@@ -111,6 +112,7 @@ export const VALID_CONFIG_KEYS = new Set([
     "compress.mode",
     "compress.permission",
     "compress.showCompression",
+    "compress.summaryBuffer",
     "compress.maxContextLimit",
     "compress.minContextLimit",
     "compress.modelMaxLimits",
@@ -353,6 +355,17 @@ export function validateConfigTypes(config: Record<string, any>): ValidationErro
                 })
             }
 
+            if (
+                compress.summaryBuffer !== undefined &&
+                typeof compress.summaryBuffer !== "boolean"
+            ) {
+                errors.push({
+                    key: "compress.summaryBuffer",
+                    expected: "boolean",
+                    actual: typeof compress.summaryBuffer,
+                })
+            }
+
             if (
                 compress.nudgeFrequency !== undefined &&
                 typeof compress.nudgeFrequency !== "number"
@@ -650,7 +663,8 @@ const defaultConfig: PluginConfig = {
         mode: "range",
         permission: "allow",
         showCompression: false,
-        maxContextLimit: 150000,
+        summaryBuffer: true,
+        maxContextLimit: 100000,
         minContextLimit: 50000,
         nudgeFrequency: 5,
         iterationNudgeThreshold: 15,
@@ -812,6 +826,7 @@ function mergeCompress(
         mode: override.mode ?? base.mode,
         permission: override.permission ?? base.permission,
         showCompression: override.showCompression ?? base.showCompression,
+        summaryBuffer: override.summaryBuffer ?? base.summaryBuffer,
         maxContextLimit: override.maxContextLimit ?? base.maxContextLimit,
         minContextLimit: override.minContextLimit ?? base.minContextLimit,
         modelMaxLimits: override.modelMaxLimits ?? base.modelMaxLimits,
diff --git a/lib/messages/inject/utils.ts b/lib/messages/inject/utils.ts
@@ -119,14 +119,42 @@ function resolveContextTokenLimit(
     return parseLimitValue(globalLimit)
 }
 
+function getActiveSummaryTokenUsage(state: SessionState): number {
+    let total = 0
+
+    for (const blockId of state.prune.messages.activeBlockIds) {
+        const block = state.prune.messages.blocksById.get(blockId)
+        if (!block || !block.active) {
+            continue
+        }
+
+        total += block.summaryTokens
+    }
+
+    return total
+}
+
 export function isContextOverLimits(
     config: PluginConfig,
     state: SessionState,
     providerId: string | undefined,
     modelId: string | undefined,
     messages: WithParts[],
 ) {
-    const maxContextLimit = resolveContextTokenLimit(config, state, providerId, modelId, "max")
+    const summaryTokenExtension = config.compress.summaryBuffer
+        ? getActiveSummaryTokenUsage(state)
+        : 0
+    const resolvedMaxContextLimit = resolveContextTokenLimit(
+        config,
+        state,
+        providerId,
+        modelId,
+        "max",
+    )
+    const maxContextLimit =
+        resolvedMaxContextLimit === undefined
+            ? undefined
+            : resolvedMaxContextLimit + summaryTokenExtension
     const minContextLimit = resolveContextTokenLimit(config, state, providerId, modelId, "min")
     const currentTokens = getCurrentTokenUsage(state, messages)
 
diff --git a/lib/state/types.ts b/lib/state/types.ts
@@ -35,6 +35,7 @@ export interface CompressionBlock {
     active: boolean
     deactivatedByUser: boolean
     compressedTokens: number
+    summaryTokens: number
     mode?: CompressionMode
     topic: string
     batchTopic?: string
diff --git a/lib/state/utils.ts b/lib/state/utils.ts
@@ -7,6 +7,7 @@ import type {
 } from "./types"
 import { isMessageCompacted, messageHasCompress } from "../shared-utils"
 import { isIgnoredUserMessage } from "../messages/utils"
+import { countTokens } from "../strategies/utils"
 
 interface PersistedPruneMessagesState {
     byMessageId?: Record<string, PrunedMessageEntry>
@@ -161,6 +162,12 @@ export function loadPruneMessagesState(
                     Number.isFinite(block.compressedTokens)
                         ? Math.max(0, block.compressedTokens)
                         : 0,
+                summaryTokens:
+                    typeof block.summaryTokens === "number" && Number.isFinite(block.summaryTokens)
+                        ? Math.max(0, block.summaryTokens)
+                        : typeof block.summary === "string"
+                          ? countTokens(block.summary)
+                          : 0,
                 mode: block.mode === "range" || block.mode === "message" ? block.mode : undefined,
                 topic: typeof block.topic === "string" ? block.topic : "",
                 batchTopic:
diff --git a/tests/compress-range-placeholders.test.ts b/tests/compress-range-placeholders.test.ts
@@ -17,6 +17,7 @@ function createBlock(blockId: number, body: string): CompressionBlock {
         active: true,
         deactivatedByUser: false,
         compressedTokens: 0,
+        summaryTokens: 0,
         topic: `Block ${blockId}`,
         startId: "m0001",
         endId: "m0002",
diff --git a/tests/message-priority.test.ts b/tests/message-priority.test.ts
@@ -532,6 +532,7 @@ test("message-mode rendered compressed summaries mark block IDs as BLOCKED", ()
         active: true,
         deactivatedByUser: false,
         compressedTokens: 0,
+        summaryTokens: 0,
         mode: "range",
         topic: "Earlier notes",
         batchTopic: "Earlier notes",
@@ -581,6 +582,7 @@ test("range-mode rendered compressed summaries keep block IDs", () => {
         active: true,
         deactivatedByUser: false,
         compressedTokens: 0,
+        summaryTokens: 0,
         mode: "range",
         topic: "Earlier notes",
         batchTopic: "Earlier notes",
diff --git a/tests/token-usage.test.ts b/tests/token-usage.test.ts
@@ -2,7 +2,9 @@ import assert from "node:assert/strict"
 import test from "node:test"
 import type { PluginConfig } from "../lib/config"
 import { isContextOverLimits } from "../lib/messages/inject/utils"
+import { wrapCompressedSummary } from "../lib/compress/state"
 import { createSessionState, type WithParts } from "../lib/state"
+import type { CompressionBlock } from "../lib/state"
 import { getCurrentTokenUsage } from "../lib/strategies/utils"
 
 function buildConfig(maxContextLimit: number, minContextLimit = 1): PluginConfig {
@@ -32,6 +34,7 @@ function buildConfig(maxContextLimit: number, minContextLimit = 1): PluginConfig
             mode: "message",
             permission: "allow",
             showCompression: false,
+            summaryBuffer: true,
             maxContextLimit,
             minContextLimit,
             nudgeFrequency: 5,
@@ -167,6 +170,37 @@ function buildPostCompactionAssistantMessage(): WithParts {
     }
 }
 
+function createActiveBlock(
+    blockId: number,
+    summary: string,
+    summaryTokens: number,
+): CompressionBlock {
+    return {
+        blockId,
+        runId: blockId,
+        active: true,
+        deactivatedByUser: false,
+        compressedTokens: 0,
+        summaryTokens,
+        mode: "message",
+        topic: `Summary ${blockId}`,
+        batchTopic: `Summary ${blockId}`,
+        startId: "m0001",
+        endId: "m0001",
+        anchorMessageId: `msg-${blockId}`,
+        compressMessageId: `compress-${blockId}`,
+        includedBlockIds: [],
+        consumedBlockIds: [],
+        parentBlockIds: [],
+        directMessageIds: [],
+        directToolIds: [],
+        effectiveMessageIds: [],
+        effectiveToolIds: [],
+        createdAt: blockId,
+        summary,
+    }
+}
+
 test("getCurrentTokenUsage returns 0 until a fresh assistant follows compaction", () => {
     const messages = buildCompactedMessages()
     const state = createSessionState()
@@ -209,3 +243,57 @@ test("isContextOverLimits ignores stale summary totals and resumes with fresh re
 
     assert.equal(overLimit.overMaxLimit, true)
 })
+
+test("isContextOverLimits extends the max threshold by active summary tokens", () => {
+    const messages = buildCompactedMessages()
+    messages.push(buildPostCompactionAssistantMessage())
+
+    const state = createSessionState()
+    state.lastCompaction = 2
+
+    const storedSummary = wrapCompressedSummary(7, repeatedWord("summary", 120))
+    state.prune.messages.blocksById.set(7, createActiveBlock(7, storedSummary, 1000))
+    state.prune.messages.activeBlockIds.add(7)
+
+    const freshReportedTotal = 2400 + 600 + 150 + 300
+
+    const underExtendedLimit = isContextOverLimits(
+        buildConfig(freshReportedTotal - 1, 1),
+        state,
+        undefined,
+        undefined,
+        messages,
+    )
+
+    assert.equal(underExtendedLimit.overMaxLimit, false)
+
+    const overExtendedLimit = isContextOverLimits(
+        buildConfig(freshReportedTotal - 1001, 1),
+        state,
+        undefined,
+        undefined,
+        messages,
+    )
+
+    assert.equal(overExtendedLimit.overMaxLimit, true)
+})
+
+test("isContextOverLimits does not extend the max threshold when summaryBuffer is disabled", () => {
+    const messages = buildCompactedMessages()
+    messages.push(buildPostCompactionAssistantMessage())
+
+    const state = createSessionState()
+    state.lastCompaction = 2
+
+    const storedSummary = wrapCompressedSummary(7, repeatedWord("summary", 120))
+    state.prune.messages.blocksById.set(7, createActiveBlock(7, storedSummary, 1000))
+    state.prune.messages.activeBlockIds.add(7)
+
+    const freshReportedTotal = 2400 + 600 + 150 + 300
+    const config = buildConfig(freshReportedTotal - 1, 1)
+    config.compress.summaryBuffer = false
+
+    const overLimit = isContextOverLimits(config, state, undefined, undefined, messages)
+
+    assert.equal(overLimit.overMaxLimit, true)
+})

Original file line number	Diff line number	Diff line change
`@@ -102,4 +102,5 @@ export interface CompressionStateInput {`
`102`	`102`	`mode: CompressionMode`
`103`	`103`	`runId: number`
`104`	`104`	`compressMessageId: string`
	`105`	`+ summaryTokens: number`
`105`	`106`	`}`