Fast-Editor · vishalveerareddy123 · Jan 24, 2026
diff --git a/.claude/settings.local.json b/.claude/settings.local.json
@@ -27,7 +27,25 @@
       "Bash(jobs:*)",
       "Bash(gh pr view:*)",
       "Bash(gh run list:*)",
-      "Bash(gh run view:*)"
+      "Bash(gh run view:*)",
+      "Bash(npm install:*)",
+      "Bash(npm run lint:*)",
+      "Bash(docker ps:*)",
+      "Bash(docker logs:*)",
+      "Bash(docker exec:*)",
+      "Bash(docker compose build:*)",
+      "Bash(docker compose restart:*)",
+      "Bash(docker compose:*)",
+      "Bash(docker run:*)",
+      "Bash(python3:*)",
+      "Bash(iconv:*)",
+      "Bash(ls:*)",
+      "Bash(pgrep:*)",
+      "Bash(docker-compose build:*)",
+      "Bash(xargs:*)",
+      "Bash(docker info:*)",
+      "Bash(docker container ls:*)",
+      "Bash(node --check:*)"
     ],
     "deny": [],
     "ask": []

diff --git a/.env.example b/.env.example
@@ -340,3 +340,109 @@ HOT_RELOAD_DEBOUNCE_MS=1000
 #   VERTEX_API_KEY=your-google-api-key
 #   VERTEX_MODEL=gemini-2.0-flash
 #   npm start
+
+# ==============================================================================
+# Headroom Context Compression (Sidecar)
+# ==============================================================================
+# Headroom provides 47-92% token reduction through intelligent context compression.
+# It runs as a Python sidecar container managed automatically by Lynkr via Docker.
+#
+# Features:
+#   - Smart Crusher: Statistical JSON compression for tool outputs
+#   - Cache Aligner: Stabilizes dynamic content for provider cache hits
+#   - CCR (Compress-Cache-Retrieve): Reversible compression with on-demand retrieval
+#   - Rolling Window: Token budget enforcement with turn-based windowing
+#   - LLMLingua (optional): ML-based 20x compression with GPU acceleration
+
+# Enable/disable Headroom compression (default: false)
+HEADROOM_ENABLED=false
+
+# Sidecar endpoint (auto-configured when Docker is enabled)
+HEADROOM_ENDPOINT=http://localhost:8787
+
+# Request timeout in milliseconds
+HEADROOM_TIMEOUT_MS=5000
+
+# Minimum tokens to trigger compression (skip small requests)
+HEADROOM_MIN_TOKENS=500
+
+# Operating mode: "audit" (observe only) or "optimize" (apply transforms)
+HEADROOM_MODE=optimize
+
+# Provider for cache optimization hints: anthropic, openai, google
+HEADROOM_PROVIDER=anthropic
+
+# Log level: debug, info, warning, error
+HEADROOM_LOG_LEVEL=info
+
+# ==============================================================================
+# Headroom Docker Configuration
+# ==============================================================================
+# When enabled, Lynkr automatically manages the Headroom container lifecycle
+
+# Enable Docker container management (default: true when HEADROOM_ENABLED=true)
+HEADROOM_DOCKER_ENABLED=true
+
+# Docker image to use
+HEADROOM_DOCKER_IMAGE=lynkr/headroom-sidecar:latest
+
+# Container name
+HEADROOM_DOCKER_CONTAINER_NAME=lynkr-headroom
+
+# Port mapping
+HEADROOM_DOCKER_PORT=8787
+
+# Resource limits
+HEADROOM_DOCKER_MEMORY_LIMIT=512m
+HEADROOM_DOCKER_CPU_LIMIT=1.0
+
+# Restart policy: no, always, unless-stopped, on-failure
+HEADROOM_DOCKER_RESTART_POLICY=unless-stopped
+
+# Docker network (optional, leave empty for default bridge)
+# HEADROOM_DOCKER_NETWORK=lynkr-network
+
+# Build from local source instead of pulling image
+# HEADROOM_DOCKER_AUTO_BUILD=true
+# HEADROOM_DOCKER_BUILD_CONTEXT=./headroom-sidecar
+
+# ==============================================================================
+# Headroom Transform Settings
+# ==============================================================================
+
+# Smart Crusher (statistical JSON compression)
+HEADROOM_SMART_CRUSHER=true
+HEADROOM_SMART_CRUSHER_MIN_TOKENS=200
+HEADROOM_SMART_CRUSHER_MAX_ITEMS=15
+
+# Tool Crusher (fixed-rules compression for tool outputs)
+HEADROOM_TOOL_CRUSHER=true
+
+# Cache Aligner (stabilize dynamic content like UUIDs, timestamps)
+HEADROOM_CACHE_ALIGNER=true
+
+# Rolling Window (context overflow management)
+HEADROOM_ROLLING_WINDOW=true
+HEADROOM_KEEP_TURNS=3
+
+# ==============================================================================
+# Headroom CCR (Compress-Cache-Retrieve)
+# ==============================================================================
+
+# Enable CCR for reversible compression with on-demand retrieval
+HEADROOM_CCR=true
+
+# TTL for cached content in seconds (default: 5 minutes)
+HEADROOM_CCR_TTL=300
+
+# ==============================================================================
+# Headroom LLMLingua (Optional ML Compression)
+# ==============================================================================
+# LLMLingua-2 provides ML-based 20x compression using BERT token classification.
+# Requires GPU for reasonable performance, or use CPU with longer timeouts.
+
+# Enable LLMLingua (default: false, requires GPU recommended)
+HEADROOM_LLMLINGUA=false
+
+# Device: cuda, cpu, auto
+HEADROOM_LLMLINGUA_DEVICE=auto
diff --git a/docker-compose.yml b/docker-compose.yml
@@ -142,6 +142,27 @@ services:
       CIRCUIT_BREAKER_TIMEOUT: ${CIRCUIT_BREAKER_TIMEOUT:-60000}
       LOAD_SHEDDING_MEMORY_THRESHOLD: ${LOAD_SHEDDING_MEMORY_THRESHOLD:-0.85}
 
+      # ============================================================
+      # HEADROOM CONTEXT COMPRESSION (OPTIONAL)
+      # ============================================================
+      # Provides 47-92% token reduction through intelligent compression
+      HEADROOM_ENABLED: ${HEADROOM_ENABLED:-false}
+      HEADROOM_ENDPOINT: ${HEADROOM_ENDPOINT:-http://headroom:8787}
+      HEADROOM_TIMEOUT_MS: ${HEADROOM_TIMEOUT_MS:-5000}
+      HEADROOM_MIN_TOKENS: ${HEADROOM_MIN_TOKENS:-500}
+      HEADROOM_MODE: ${HEADROOM_MODE:-optimize}
+      # Disable Docker management - we use docker-compose instead
+      HEADROOM_DOCKER_ENABLED: "false"
+      # Transform settings
+      HEADROOM_SMART_CRUSHER: ${HEADROOM_SMART_CRUSHER:-true}
+      HEADROOM_TOOL_CRUSHER: ${HEADROOM_TOOL_CRUSHER:-true}
+      HEADROOM_CACHE_ALIGNER: ${HEADROOM_CACHE_ALIGNER:-true}
+      HEADROOM_ROLLING_WINDOW: ${HEADROOM_ROLLING_WINDOW:-true}
+      HEADROOM_KEEP_TURNS: ${HEADROOM_KEEP_TURNS:-3}
+      HEADROOM_CCR: ${HEADROOM_CCR:-true}
+      HEADROOM_CCR_TTL: ${HEADROOM_CCR_TTL:-300}
+      HEADROOM_LLMLINGUA: ${HEADROOM_LLMLINGUA:-false}
+
     volumes:
       - ./data:/app/data  # Persist SQLite databases
       - .:/workspace      # Mount workspace
@@ -244,13 +265,77 @@ services:
       retries: 3
       start_period: 20s
 
+  # Headroom context compression sidecar (47-92% token reduction)
+  headroom:
+    image: lynkr/headroom-sidecar:latest
+    container_name: lynkr-headroom
+    profiles:
+      - headroom
+    build:
+      context: ./headroom-sidecar
+      dockerfile: Dockerfile
+    ports:
+      - "8787:8787"
+    environment:
+      HEADROOM_HOST: "0.0.0.0"
+      HEADROOM_PORT: "8787"
+      HEADROOM_LOG_LEVEL: ${HEADROOM_LOG_LEVEL:-info}
+      HEADROOM_MODE: ${HEADROOM_MODE:-optimize}
+      HEADROOM_PROVIDER: ${HEADROOM_PROVIDER:-anthropic}
+      # Transforms
+      HEADROOM_SMART_CRUSHER: ${HEADROOM_SMART_CRUSHER:-true}
+      HEADROOM_SMART_CRUSHER_MIN_TOKENS: ${HEADROOM_SMART_CRUSHER_MIN_TOKENS:-200}
+      HEADROOM_SMART_CRUSHER_MAX_ITEMS: ${HEADROOM_SMART_CRUSHER_MAX_ITEMS:-15}
+      HEADROOM_TOOL_CRUSHER: ${HEADROOM_TOOL_CRUSHER:-true}
+      HEADROOM_CACHE_ALIGNER: ${HEADROOM_CACHE_ALIGNER:-true}
+      HEADROOM_ROLLING_WINDOW: ${HEADROOM_ROLLING_WINDOW:-true}
+      HEADROOM_KEEP_TURNS: ${HEADROOM_KEEP_TURNS:-3}
+      # CCR
+      HEADROOM_CCR: ${HEADROOM_CCR:-true}
+      HEADROOM_CCR_TTL: ${HEADROOM_CCR_TTL:-300}
+      # LLMLingua (optional, requires GPU)
+      HEADROOM_LLMLINGUA: ${HEADROOM_LLMLINGUA:-false}
+      HEADROOM_LLMLINGUA_DEVICE: ${HEADROOM_LLMLINGUA_DEVICE:-auto}
+    volumes:
+      - headroom-data:/app/data
+    restart: unless-stopped
+    networks:
+      - lynkr-network
+    healthcheck:
+      test: ["CMD", "curl", "-f", "http://localhost:8787/health"]
+      interval: 30s
+      timeout: 10s
+      retries: 3
+      start_period: 30s
+    labels:
+      - "com.lynkr.service=headroom"
+      - "com.lynkr.description=Context compression sidecar"
+    deploy:
+      resources:
+        limits:
+          cpus: '1'
+          memory: 512M
+        reservations:
+          cpus: '0.25'
+          memory: 256M
+    # Uncomment for GPU support (LLMLingua)
+    # deploy:
+    #   resources:
+    #     reservations:
+    #       devices:
+    #         - driver: nvidia
+    #           count: 1
+    #           capabilities: [gpu]
+
 volumes:
   ollama-data:
     driver: local
   # ollama-webui-data:
   #   driver: local
   searxng-data:
     driver: local
+  headroom-data:
+    driver: local
 
 networks:
   lynkr-network:

diff --git a/documentation/README.md b/documentation/README.md
@@ -31,6 +31,7 @@ Understand Lynkr's capabilities:
 - **[Architecture & Features](features.md)** - System architecture, request flow, format conversion, and core capabilities
 - **[Memory System](memory-system.md)** - Titans-inspired long-term memory with surprise-based filtering and decay
 - **[Token Optimization](token-optimization.md)** - Achieve 60-80% cost reduction through smart tool selection, prompt caching, and memory deduplication
+- **[Headroom Compression](headroom.md)** - 47-92% token reduction through intelligent context compression (Smart Crusher, CCR, LLMLingua)
 - **[Tools & Execution Modes](tools.md)** - Tool calling, server vs client execution, custom tool integration, MCP support
 
 ---
@@ -71,7 +72,7 @@ Get help and contribute:
 - [Installation](installation.md) | [Providers](providers.md) | [Claude Code](claude-code-cli.md) | [Cursor](cursor-integration.md) | [Embeddings](embeddings.md)
 
 ### Features & Optimization
-- [Features](features.md) | [Memory System](memory-system.md) | [Token Optimization](token-optimization.md) | [Tools](tools.md)
+- [Features](features.md) | [Memory System](memory-system.md) | [Token Optimization](token-optimization.md) | [Headroom](headroom.md) | [Tools](tools.md)
 
 ### Deployment & Production
 - [Docker](docker.md) | [Production](production.md) | [API Reference](api.md)