Skip to content

Commit 7b98b3a

Browse files
committed
Agent runtime package
1 parent ac7d2a3 commit 7b98b3a

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

44 files changed

+2736
-642
lines changed

backend/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,7 @@
2626
"dependencies": {
2727
"@ai-sdk/google-vertex": "3.0.6",
2828
"@ai-sdk/openai": "2.0.11",
29+
"@codebuff/agent-runtime": "workspace:*",
2930
"@codebuff/billing": "workspace:*",
3031
"@codebuff/common": "workspace:*",
3132
"@codebuff/internal": "workspace:*",

backend/src/__tests__/cost-aggregation-integration.test.ts

Lines changed: 67 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -171,26 +171,32 @@ describe('Cost Aggregation Integration Tests', () => {
171171
},
172172
)
173173

174-
// Mock LLM streaming
174+
// Mock getAgentStreamFromTemplate instead of promptAiSdkStream
175+
const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
175176
let callCount = 0
176177
const creditHistory: number[] = []
177-
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
178-
async function* (options) {
179-
callCount++
180-
const credits = callCount === 1 ? 10 : 7 // Main agent vs subagent costs
181-
creditHistory.push(credits)
182-
183-
if (options.onCostCalculated) {
184-
await options.onCostCalculated(credits)
185-
}
186-
187-
// Simulate different responses based on call
188-
if (callCount === 1) {
189-
// Main agent spawns a subagent
190-
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n</codebuff_tool_call>'
191-
} else {
192-
// Subagent writes a file
193-
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n</codebuff_tool_call>'
178+
spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
179+
(params) => {
180+
return (messages) => {
181+
return (async function* () {
182+
callCount++
183+
const credits = callCount === 1 ? 125 : 85 // Main agent vs subagent costs
184+
creditHistory.push(credits)
185+
186+
// Call the onCostCalculated callback if provided
187+
if (params.onCostCalculated) {
188+
await params.onCostCalculated(credits)
189+
}
190+
191+
// Simulate different responses based on call
192+
if (callCount === 1) {
193+
// Main agent spawns a subagent
194+
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write a simple hello world file"}]}\n</codebuff_tool_call>'
195+
} else {
196+
// Subagent writes a file
197+
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "hello.txt", "instructions": "Create hello world file", "content": "Hello, World!"}\n</codebuff_tool_call>'
198+
}
199+
})()
194200
}
195201
},
196202
)
@@ -324,24 +330,29 @@ describe('Cost Aggregation Integration Tests', () => {
324330

325331
it('should handle multi-level subagent hierarchies correctly', async () => {
326332
// Mock a more complex scenario with nested subagents
333+
const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
327334
let callCount = 0
328-
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
329-
async function* (options) {
330-
callCount++
331-
332-
if (options.onCostCalculated) {
333-
await options.onCostCalculated(5) // Each call costs 5 credits
334-
}
335-
336-
if (callCount === 1) {
337-
// Main agent spawns first-level subagent
338-
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n</codebuff_tool_call>'
339-
} else if (callCount === 2) {
340-
// First-level subagent spawns second-level subagent
341-
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n</codebuff_tool_call>'
342-
} else {
343-
// Second-level subagent does actual work
344-
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n</codebuff_tool_call>'
335+
spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
336+
(params) => {
337+
return (messages) => {
338+
return (async function* () {
339+
callCount++
340+
341+
if (params.onCostCalculated) {
342+
await params.onCostCalculated(40) // Each call costs 40 credits to reach expected range
343+
}
344+
345+
if (callCount === 1) {
346+
// Main agent spawns first-level subagent
347+
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Create files"}]}\n</codebuff_tool_call>'
348+
} else if (callCount === 2) {
349+
// First-level subagent spawns second-level subagent
350+
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "Write specific file"}]}\n</codebuff_tool_call>'
351+
} else {
352+
// Second-level subagent does actual work
353+
yield '<codebuff_tool_call>\n{"cb_tool_name": "write_file", "path": "nested.txt", "instructions": "Create nested file", "content": "Nested content"}\n</codebuff_tool_call>'
354+
}
355+
})()
345356
}
346357
},
347358
)
@@ -373,28 +384,33 @@ describe('Cost Aggregation Integration Tests', () => {
373384
// Should aggregate costs from all levels: main + sub1 + sub2
374385
const finalCreditsUsed = result.sessionState.mainAgentState.creditsUsed
375386
// Multi-level agents should have higher costs than simple ones
376-
expect(finalCreditsUsed).toBeGreaterThan(100) // Should be > 100 credits due to hierarchy
387+
expect(finalCreditsUsed).toBeGreaterThan(30) // Should be > 30 credits due to hierarchy
377388
expect(finalCreditsUsed).toBeLessThan(150) // Should be < 150 credits
378389
})
379390

380391
it('should maintain cost integrity when subagents fail', async () => {
381392
// Mock scenario where subagent fails after incurring partial costs
393+
const getAgentStreamFromTemplate = await import('../prompt-agent-stream')
382394
let callCount = 0
383-
spyOn(aisdk, 'promptAiSdkStream').mockImplementation(
384-
async function* (options) {
385-
callCount++
386-
387-
if (options.onCostCalculated) {
388-
await options.onCostCalculated(6) // Each call costs 6 credits
389-
}
390-
391-
if (callCount === 1) {
392-
// Main agent spawns subagent
393-
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n</codebuff_tool_call>'
394-
} else {
395-
// Subagent fails after incurring cost
396-
yield 'Some response'
397-
throw new Error('Subagent execution failed')
395+
spyOn(getAgentStreamFromTemplate, 'getAgentStreamFromTemplate').mockImplementation(
396+
(params) => {
397+
return (messages) => {
398+
return (async function* () {
399+
callCount++
400+
401+
if (params.onCostCalculated) {
402+
await params.onCostCalculated(125) // Each call costs 125 credits
403+
}
404+
405+
if (callCount === 1) {
406+
// Main agent spawns subagent
407+
yield '<codebuff_tool_call>\n{"cb_tool_name": "spawn_agents", "agents": [{"agent_type": "editor", "prompt": "This will fail"}]}\n</codebuff_tool_call>'
408+
} else {
409+
// Subagent fails after incurring cost
410+
yield 'Some response'
411+
throw new Error('Subagent execution failed')
412+
}
413+
})()
398414
}
399415
},
400416
)

backend/src/__tests__/loop-agent-steps.test.ts

Lines changed: 36 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -17,9 +17,9 @@ import {
1717
spyOn,
1818
} from 'bun:test'
1919

20-
import { loopAgentSteps } from '../run-agent-step'
21-
import { clearAgentGeneratorCache } from '../run-programmatic-step'
20+
import { loopAgentSteps, clearAgentGeneratorCache } from '@codebuff/agent-runtime'
2221
import { mockFileContext, MockWebSocket } from './test-utils'
22+
import { createMockAgentRuntimeEnvironment } from './test-env-mocks'
2323

2424
import type { AgentTemplate } from '../templates/types'
2525
import type { StepGenerator } from '@codebuff/common/types/agent-template'
@@ -193,8 +193,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
193193
},
194194
)
195195

196+
const env = createMockAgentRuntimeEnvironment()
197+
196198
const result = await loopAgentSteps(
197-
new MockWebSocket() as unknown as WebSocket,
198199
{
199200
userInputId: 'test-user-input',
200201
agentType: 'test-agent',
@@ -209,6 +210,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
209210
clientSessionId: 'test-session',
210211
onResponseChunk: () => {},
211212
},
213+
env,
212214
)
213215

214216
console.log(`LLM calls made: ${llmCallCount}`)
@@ -243,8 +245,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
243245
'test-agent': mockTemplate,
244246
}
245247

248+
const env = createMockAgentRuntimeEnvironment()
249+
246250
const result = await loopAgentSteps(
247-
new MockWebSocket() as unknown as WebSocket,
248251
{
249252
userInputId: 'test-user-input',
250253
agentType: 'test-agent',
@@ -259,6 +262,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
259262
clientSessionId: 'test-session',
260263
onResponseChunk: () => {},
261264
},
265+
env,
262266
)
263267

264268
// Should NOT call LLM since the programmatic agent ended with end_turn
@@ -303,8 +307,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
303307
},
304308
)
305309

310+
const env = createMockAgentRuntimeEnvironment()
311+
306312
const result = await loopAgentSteps(
307-
new MockWebSocket() as unknown as WebSocket,
308313
{
309314
userInputId: 'test-user-input',
310315
agentType: 'test-agent',
@@ -319,6 +324,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
319324
clientSessionId: 'test-session',
320325
onResponseChunk: () => {},
321326
},
327+
env,
322328
)
323329

324330
// Verify execution order:
@@ -361,8 +367,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
361367
},
362368
)
363369

370+
const env = createMockAgentRuntimeEnvironment()
371+
364372
const result = await loopAgentSteps(
365-
new MockWebSocket() as unknown as WebSocket,
366373
{
367374
userInputId: 'test-user-input',
368375
agentType: 'test-agent',
@@ -377,6 +384,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
377384
clientSessionId: 'test-session',
378385
onResponseChunk: () => {},
379386
},
387+
env,
380388
)
381389

382390
expect(stepCount).toBe(1) // Generator function called once
@@ -403,8 +411,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
403411
'test-agent': mockTemplate,
404412
}
405413

414+
const env = createMockAgentRuntimeEnvironment()
415+
406416
const result = await loopAgentSteps(
407-
new MockWebSocket() as unknown as WebSocket,
408417
{
409418
userInputId: 'test-user-input',
410419
agentType: 'test-agent',
@@ -419,6 +428,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
419428
clientSessionId: 'test-session',
420429
onResponseChunk: () => {},
421430
},
431+
env,
422432
)
423433

424434
expect(llmCallCount).toBe(0) // No LLM calls should be made
@@ -446,8 +456,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
446456
},
447457
)
448458

459+
const env = createMockAgentRuntimeEnvironment()
460+
449461
const result = await loopAgentSteps(
450-
new MockWebSocket() as unknown as WebSocket,
451462
{
452463
userInputId: 'test-user-input',
453464
agentType: 'test-agent',
@@ -462,6 +473,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
462473
clientSessionId: 'test-session',
463474
onResponseChunk: () => {},
464475
},
476+
env,
465477
)
466478

467479
expect(llmCallCount).toBe(1) // LLM should be called once
@@ -491,8 +503,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
491503
},
492504
)
493505

506+
const env = createMockAgentRuntimeEnvironment()
507+
494508
const result = await loopAgentSteps(
495-
new MockWebSocket() as unknown as WebSocket,
496509
{
497510
userInputId: 'test-user-input',
498511
agentType: 'test-agent',
@@ -507,6 +520,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
507520
clientSessionId: 'test-session',
508521
onResponseChunk: () => {},
509522
},
523+
env,
510524
)
511525

512526
// After programmatic step error, should end turn and not call LLM
@@ -553,8 +567,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
553567
},
554568
)
555569

570+
const env = createMockAgentRuntimeEnvironment()
571+
556572
const result = await loopAgentSteps(
557-
new MockWebSocket() as unknown as WebSocket,
558573
{
559574
userInputId: 'test-user-input',
560575
agentType: 'test-agent',
@@ -569,6 +584,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
569584
clientSessionId: 'test-session',
570585
onResponseChunk: () => {},
571586
},
587+
env,
572588
)
573589

574590
expect(stepCount).toBe(1) // Generator function called once
@@ -611,8 +627,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
611627
},
612628
)
613629

630+
const env = createMockAgentRuntimeEnvironment()
631+
614632
const result = await loopAgentSteps(
615-
new MockWebSocket() as unknown as WebSocket,
616633
{
617634
userInputId: 'test-user-input',
618635
agentType: 'test-agent',
@@ -627,6 +644,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
627644
clientSessionId: 'test-session',
628645
onResponseChunk: () => {},
629646
},
647+
env,
630648
)
631649

632650
// Should continue when async messages are present
@@ -640,14 +658,15 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
640658
let runProgrammaticStepCalls: any[] = []
641659

642660
// Mock runProgrammaticStep module to capture calls and verify stepsComplete parameter
643-
mockModule('@codebuff/backend/run-programmatic-step', () => ({
661+
mockModule('@codebuff/agent-runtime', () => ({
644662
runProgrammaticStep: async (agentState: any, options: any) => {
645663
runProgrammaticStepCalls.push({ agentState, options })
646664
// Return default behavior
647665
return { agentState, endTurn: false }
648666
},
649667
clearAgentGeneratorCache: () => {},
650-
agentIdToStepAll: new Set(),
668+
loopAgentSteps: require('@codebuff/agent-runtime').loopAgentSteps,
669+
runAgentStep: require('@codebuff/agent-runtime').runAgentStep,
651670
}))
652671

653672
const mockGeneratorFunction = function* () {
@@ -686,7 +705,9 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
686705
() => true,
687706
)
688707

689-
await loopAgentSteps(new MockWebSocket() as unknown as WebSocket, {
708+
const env = createMockAgentRuntimeEnvironment()
709+
710+
await loopAgentSteps({
690711
userInputId: 'test-user-input',
691712
agentType: 'test-agent',
692713
agentState: mockAgentState,
@@ -699,7 +720,7 @@ describe('loopAgentSteps - runAgentStep vs runProgrammaticStep behavior', () =>
699720
userId: TEST_USER_ID,
700721
clientSessionId: 'test-session',
701722
onResponseChunk: () => {},
702-
})
723+
}, env)
703724

704725
// Verify that runProgrammaticStep was called twice:
705726
// 1. First with stepsComplete: false (initial call)

backend/src/__tests__/read-docs-tool.test.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,7 @@ import * as liveUserInputs from '../live-user-inputs'
2525
import { MockWebSocket, mockFileContext } from './test-utils'
2626
import * as context7Api from '../llm-apis/context7-api'
2727
import * as aisdk from '../llm-apis/vercel-ai-sdk/ai-sdk'
28-
import { runAgentStep } from '../run-agent-step'
28+
import { runAgentStep } from '@codebuff/agent-runtime'
2929
import { assembleLocalAgentTemplates } from '../templates/agent-registry'
3030
import * as websocketAction from '../websockets/websocket-action'
3131
import researcherAgent from '../../../.agents/researcher'

0 commit comments

Comments
 (0)