CodebuffAI
diff --git a/‎backend/src/__tests__/run-programmatic-step.test.ts‎
Lines changed: 29 additions & 12 deletions b/‎backend/src/__tests__/run-programmatic-step.test.ts‎
Lines changed: 29 additions & 12 deletions
diff --git a/‎backend/src/__tests__/sandbox-generator.test.ts‎
Lines changed: 39 additions & 1 deletion b/‎backend/src/__tests__/sandbox-generator.test.ts‎
Lines changed: 39 additions & 1 deletion
diff --git a/‎backend/src/run-programmatic-step.ts‎
Lines changed: 1 addition & 1 deletion b/‎backend/src/run-programmatic-step.ts‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎knowledge.md‎
Lines changed: 21 additions & 0 deletions b/‎knowledge.md‎
Lines changed: 21 additions & 0 deletions
diff --git a/‎npm-app/release/package.json‎
Lines changed: 1 addition & 1 deletion b/‎npm-app/release/package.json‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎npm-app/src/cli-definitions.ts‎
Lines changed: 5 additions & 0 deletions b/‎npm-app/src/cli-definitions.ts‎
Lines changed: 5 additions & 0 deletions
@@ -24,6 +24,8 @@ import {
 import { mockFileContext, MockWebSocket } from './test-utils'
 import * as toolExecutor from '../tools/tool-executor'
 import * as requestContext from '../websockets/request-context'
+import * as agentRun from '../agent-run'
+import * as websocketAction from '../websockets/websocket-action'
 
 import type { AgentTemplate, StepGenerator } from '../templates/types'
 import type { PublicAgentState } from '@codebuff/common/types/agent-template'
@@ -40,6 +42,8 @@ describe('runProgrammaticStep', () => {
   let mockParams: any
   let executeToolCallSpy: any
   let getRequestContextSpy: any
+  let addAgentStepSpy: any
+  let sendActionSpy: any
 
   beforeAll(() => {
     // Mock logger
@@ -74,6 +78,17 @@ describe('runProgrammaticStep', () => {
       processedRepoId: 'test-repo-id',
     }))
 
+    // Mock addAgentStep
+    addAgentStepSpy = spyOn(agentRun, 'addAgentStep').mockImplementation(
+      async () => 'test-step-id',
+    )
+
+    // Mock sendAction
+    sendActionSpy = spyOn(
+      websocketAction,
+      'sendAction',
+    ).mockImplementation(() => {})
+
     // Mock crypto.randomUUID
     spyOn(crypto, 'randomUUID').mockImplementation(
       () =>
@@ -103,11 +118,14 @@ describe('runProgrammaticStep', () => {
     mockAgentState = {
       ...sessionState.mainAgentState,
       agentId: 'test-agent-id',
+      runId: 'test-run-id' as `${string}-${string}-${string}-${string}-${string}`,
       messageHistory: [
         { role: 'user', content: 'Initial message' },
         { role: 'assistant', content: 'Initial response' },
       ],
       output: undefined,
+      directCreditsUsed: 0,
+      childRunIds: [],
     }
 
     // Create mock params
@@ -124,6 +142,9 @@ describe('runProgrammaticStep', () => {
       assistantMessage: undefined,
       assistantPrefix: undefined,
       ws: new MockWebSocket() as unknown as WebSocket,
+      localAgentTemplates: {},
+      stepsComplete: false,
+      stepNumber: 1,
     }
   })
 
@@ -184,9 +205,10 @@ describe('runProgrammaticStep', () => {
       expect(result1.endTurn).toBe(false)
 
       // Second call should return early due to STEP_ALL state
-      const result2 = await runProgrammaticStep(mockAgentState, mockParams)
+      // Use the same agent state with the same runId
+      const result2 = await runProgrammaticStep(result1.agentState, mockParams)
       expect(result2.endTurn).toBe(false)
-      expect(result2.agentState).toEqual(mockAgentState)
+      expect(result2.agentState.agentId).toEqual(result1.agentState.agentId)
     })
 
     it('should throw error when template has no handleStep', async () => {
@@ -215,12 +237,7 @@ describe('runProgrammaticStep', () => {
 
       // Track chunks sent via sendSubagentChunk
       const sentChunks: string[] = []
-      const originalSendAction =
-        require('../websockets/websocket-action').sendAction
-      const sendActionSpy = spyOn(
-        require('../websockets/websocket-action'),
-        'sendAction',
-      ).mockImplementation((ws: any, action: any) => {
+      sendActionSpy.mockImplementation((ws: any, action: any) => {
         if (action.type === 'subagent-response-chunk') {
           sentChunks.push(action.chunk)
         }
@@ -619,7 +636,7 @@ describe('runProgrammaticStep', () => {
       // Verify STEP_ALL behavior
       expect(executeToolCallSpy).not.toHaveBeenCalled() // No tools should execute
       expect(result2.endTurn).toBe(false) // Should still not end turn
-      expect(result2.agentState).toEqual(result1.agentState) // State should be unchanged
+      expect(result2.agentState.agentId).toEqual(result1.agentState.agentId) // State should be similar
       expect(stepCount).toBe(1) // Generator should not have run again
 
       // Third call - verify STEP_ALL state persists
@@ -629,7 +646,7 @@ describe('runProgrammaticStep', () => {
 
       expect(executeToolCallSpy).not.toHaveBeenCalled()
       expect(result3.endTurn).toBe(false)
-      expect(result3.agentState).toEqual(result1.agentState)
+      expect(result3.agentState.agentId).toEqual(result1.agentState.agentId)
       expect(stepCount).toBe(1) // Generator should still not have run again
     })
 
@@ -1120,7 +1137,7 @@ describe('runProgrammaticStep', () => {
       expect(generatorCallCount).toBe(1)
 
       // Second call with stepsComplete=false should return early due to STEP_ALL
-      const result2 = await runProgrammaticStep(mockAgentState, {
+      const result2 = await runProgrammaticStep(result1.agentState, {
         ...mockParams,
         stepsComplete: false,
       })
@@ -1134,7 +1151,7 @@ describe('runProgrammaticStep', () => {
         }
       })
 
-      const result3 = await runProgrammaticStep(mockAgentState, {
+      const result3 = await runProgrammaticStep(result2.agentState, {
         ...mockParams,
         stepsComplete: true,
       })
 
@@ -2,13 +2,20 @@ import {
   getInitialAgentState,
   type AgentState,
 } from '@codebuff/common/types/session-state'
-import { afterEach, beforeEach, describe, expect, test } from 'bun:test'
+import { afterEach, beforeEach, describe, expect, test, spyOn } from 'bun:test'
+import {
+  clearMockedModules,
+  mockModule,
+} from '@codebuff/common/testing/mock-modules'
 
 import {
   clearAgentGeneratorCache,
   runProgrammaticStep,
 } from '../run-programmatic-step'
 import { mockFileContext, MockWebSocket } from './test-utils'
+import * as agentRun from '../agent-run'
+import * as requestContext from '../websockets/request-context'
+import * as websocketAction from '../websockets/websocket-action'
 
 import type { AgentTemplate } from '../templates/types'
 import type { WebSocket } from 'ws'
@@ -21,11 +28,39 @@ describe('QuickJS Sandbox Generator', () => {
   beforeEach(() => {
     clearAgentGeneratorCache()
 
+    // Mock dependencies
+    spyOn(agentRun, 'addAgentStep').mockImplementation(
+      async () => 'test-step-id',
+    )
+    spyOn(requestContext, 'getRequestContext').mockImplementation(() => ({
+      processedRepoId: 'test-repo-id',
+    }))
+    spyOn(websocketAction, 'sendAction').mockImplementation(() => {})
+    spyOn(crypto, 'randomUUID').mockImplementation(
+      () =>
+        'mock-uuid-0000-0000-0000-000000000000' as `${string}-${string}-${string}-${string}-${string}`,
+    )
+
+    // Mock logger
+    mockModule('@codebuff/backend/util/logger', () => ({
+      logger: {
+        debug: () => {},
+        error: () => {},
+        info: () => {},
+        warn: () => {},
+      },
+      withLoggerContext: async (context: any, fn: () => Promise<any>) => fn(),
+    }))
+
     // Reuse common test data structure
     mockAgentState = {
       ...getInitialAgentState(),
       agentId: 'test-agent-123',
       agentType: 'test-vm-agent',
+      runId:
+        'test-run-id' as `${string}-${string}-${string}-${string}-${string}`,
+      directCreditsUsed: 0,
+      childRunIds: [],
     }
 
     // Base template structure - will be customized per test
@@ -62,11 +97,14 @@ describe('QuickJS Sandbox Generator', () => {
       assistantPrefix: undefined,
       ws: new MockWebSocket() as unknown as WebSocket,
       localAgentTemplates: {},
+      stepsComplete: false,
+      stepNumber: 1,
     }
   })
 
   afterEach(() => {
     clearAgentGeneratorCache()
+    clearMockedModules()
   })
 
   test('should execute string-based generator in QuickJS sandbox', async () => {
 
@@ -188,7 +188,7 @@ export async function runProgrammaticStep(
         break
       }
       if (result.value === 'STEP_ALL') {
-        runIdToStepAll.add(state.agentState.agentId)
+        runIdToStepAll.add(state.agentState.runId)
         break
       }
 
 
@@ -55,6 +55,25 @@ Codebuff is a tool for editing codebases via natural language instruction to Buf
 - ESC key to toggle menu or stop AI response
 - CTRL+C to exit the application
 
+### Shell Shims (Direct Commands)
+
+Codebuff supports shell shims for direct command invocation without the `codebuff` prefix.
+
+- **Cross-platform**: Works on Windows (CMD/PowerShell), macOS, and Linux (bash/zsh/fish)
+- **Store integration**: Uses fully qualified agent IDs from the agent store
+- **Easy management**: Install, update, list, and uninstall shims via CLI commands### Quick Start (Recommended)
+
+```bash
+# One-step setup: install and add to PATH automatically
+codebuff shims install codebuff/base-lite@1.0.0
+
+# Use immediately in current session (follow the printed instruction)
+eval "$(codebuff shims env)"
+
+# Now use direct commands!
+base-lite "fix this bug"             # Works right away!
+```
+
 ## Package Management
 
 - Use Bun for all package management operations
@@ -326,11 +345,13 @@ Templates are maintained in the codebuff community repo. Each directory correspo
 **Important**: When adding database indexes or schema changes, modify the schema file directly (`common/src/db/schema.ts`) using Drizzle's index syntax, then run the migration generation script to create the actual migration files.
 
 **Do NOT** write migration SQL files directly. The proper workflow is:
+
 1. Update `common/src/db/schema.ts` with new indexes using Drizzle syntax
 2. Run the migration generation script to create the SQL migration files
 3. Apply the migrations using the deployment process
 
 Example of adding performance indexes:
+
 ```typescript
 index('idx_table_optimized')
   .on(table.column1, table.column2)
 
@@ -1,6 +1,6 @@
 {
   "name": "codebuff",
-  "version": "1.0.491",
+  "version": "1.0.492",
   "description": "AI coding agent",
   "license": "MIT",
   "bin": {
 
@@ -93,4 +93,9 @@ export const cliOptions: CliParam[] = [
     menuDescription: 'Log subagent messages to trace files',
     hidden: false,
   },
+  {
+    flags: '--force',
+    description: 'Force overwrite existing shims',
+    hidden: true,
+  },
 ]
Original file line number	Diff line number	Diff line change
`@@ -188,7 +188,7 @@ export async function runProgrammaticStep(`
`188`	`188`	`break`
`189`	`189`	`}`
`190`	`190`	`if (result.value === 'STEP_ALL') {`
`191`		`- runIdToStepAll.add(state.agentState.agentId)`
	`191`	`+ runIdToStepAll.add(state.agentState.runId)`
`192`	`192`	`break`
`193`	`193`	`}`
`194`	`194`
Original file line number	Diff line number	Diff line change
`@@ -1,6 +1,6 @@`
`1`	`1`	`{`
`2`	`2`	`"name": "codebuff",`
`3`		`- "version": "1.0.491",`
	`3`	`+ "version": "1.0.492",`
`4`	`4`	`"description": "AI coding agent",`
`5`	`5`	`"license": "MIT",`
`6`	`6`	`"bin": {`