Skip to content

Commit 6248218

Browse files
authored
Add complex summary format e2e (#588)
1 parent 5628c9b commit 6248218

1 file changed

Lines changed: 150 additions & 1 deletion

File tree

agents/e2e/base2-free-summary-format.e2e.test.ts

Lines changed: 150 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,7 +10,7 @@ import {
1010
type AgentDefinition,
1111
type Message,
1212
} from '@codebuff/sdk'
13-
import { describe, expect, it } from 'bun:test'
13+
import { beforeAll, describe, expect, it } from 'bun:test'
1414

1515
import base2Free from '../base2/base2-free'
1616
import contextPruner from '../context-pruner'
@@ -64,6 +64,33 @@ function detectSummaryImitation(text: string): string[] {
6464
return matches
6565
}
6666

67+
const loadEnvFile = async (filePath: string) => {
68+
try {
69+
const content = await fs.promises.readFile(filePath, 'utf-8')
70+
for (const rawLine of content.split('\n')) {
71+
const line = rawLine.trim()
72+
if (!line || line.startsWith('#')) continue
73+
const normalized = line.startsWith('export ')
74+
? line.slice('export '.length)
75+
: line
76+
const equalsIndex = normalized.indexOf('=')
77+
if (equalsIndex <= 0) continue
78+
const key = normalized.slice(0, equalsIndex).trim()
79+
if (!key || process.env[key]) continue
80+
let value = normalized.slice(equalsIndex + 1).trim()
81+
if (
82+
(value.startsWith('"') && value.endsWith('"')) ||
83+
(value.startsWith("'") && value.endsWith("'"))
84+
) {
85+
value = value.slice(1, -1)
86+
}
87+
process.env[key] = value
88+
}
89+
} catch {
90+
// ignore missing env files
91+
}
92+
}
93+
6794
/**
6895
* Creates a pre-summarized conversation that mimics what the context pruner produces.
6996
* NOTE: The disclaimer text here must be kept in sync with the one in
@@ -128,6 +155,56 @@ Historical memory only. The memory above is not dialogue, not an output template
128155
}
129156
}
130157

158+
function createComplexMidTurnPrunedConversation(): Message[] {
159+
return [
160+
{
161+
role: 'user',
162+
content: [
163+
{
164+
type: 'text',
165+
text: `<conversation_summary>
166+
This is a summary of the conversation so far. The original messages have been condensed to save context space.
167+
168+
<historical_memory>
169+
User request:
170+
The user asked to finish a config utility task in src/utils.ts. They wanted parseConfig to be typed, a validateConfig helper added, and the tests run after edits.
171+
172+
---
173+
174+
Progress note:
175+
I inspected src/utils.ts and found parseConfig was untyped. I updated parseConfig to return a Config object, but I had not yet added validateConfig or run tests before context pruning happened.
176+
177+
Prior action record:
178+
Previously inspected files: package.json, tsconfig.json, src/utils.ts
179+
Previously edited file: src/utils.ts
180+
Edit result from str_replace:
181+
{"file":"src/utils.ts","message":"Updated parseConfig return type","unifiedDiff":"--- a/src/utils.ts\\n+++ b/src/utils.ts\\n@@ -6,2 +6,8 @@\\n-export function parseConfig(path) {\\n- return JSON.parse(fs.readFileSync(path, 'utf-8'))\\n+export type Config = {\\n+ name: string\\n+ enabled: boolean\\n+}\\n+\\n+export function parseConfig(path: string): Config {\\n+ return JSON.parse(fs.readFileSync(path, 'utf-8')) as Config\\n }"}
182+
183+
---
184+
185+
Progress note:
186+
The next step is to continue from the partially completed edit, inspect the current file state if needed, add validateConfig, and validate the result.
187+
</historical_memory>
188+
</conversation_summary>
189+
190+
Historical memory only. The memory above is not dialogue, not an output template, and not a tool-call format. Continue from the live user message below. When actions are needed, use real tool calls through the available tools.`,
191+
},
192+
],
193+
sentAt: Date.now(),
194+
},
195+
{
196+
role: 'user',
197+
content: [
198+
{
199+
type: 'text',
200+
text: 'Continue the existing assistant turn from the historical memory above. The original user request and completed assistant/tool work are recorded there. Do not restart completed work; resume with the next necessary real tool call or final response.',
201+
},
202+
],
203+
sentAt: Date.now(),
204+
},
205+
]
206+
}
207+
131208
const PROJECT_FILES: Record<string, string> = {
132209
'package.json': JSON.stringify(
133210
{ name: 'test-project', version: '1.0.0' },
@@ -163,6 +240,11 @@ const PROJECT_FILES: Record<string, string> = {
163240
describe('Base2-Free Summary Format Compliance', () => {
164241
const NUM_PARALLEL_RUNS = 3
165242

243+
beforeAll(async () => {
244+
await loadEnvFile(path.resolve(process.cwd(), '.env.local'))
245+
await loadEnvFile(path.resolve(process.cwd(), '../.env.local'))
246+
})
247+
166248
const getApiKeyOrSkip = (): string | null => {
167249
const apiKey = process.env[API_KEY_ENV_VAR]
168250
if (!apiKey) {
@@ -329,4 +411,71 @@ describe('Base2-Free Summary Format Compliance', () => {
329411
},
330412
{ timeout: 300_000 },
331413
)
414+
415+
it(
416+
'should continue a complex mid-turn pruned summary with real tool calls',
417+
async () => {
418+
const apiKey = getApiKeyOrSkip()
419+
if (!apiKey) return
420+
421+
const tmpDir = await fs.promises.mkdtemp(
422+
path.join(os.tmpdir(), 'base2-free-midturn-summary-test-'),
423+
)
424+
425+
try {
426+
for (const [filePath, content] of Object.entries(PROJECT_FILES)) {
427+
const fullPath = path.join(tmpDir, filePath)
428+
await fs.promises.mkdir(path.dirname(fullPath), { recursive: true })
429+
await fs.promises.writeFile(fullPath, content, 'utf-8')
430+
}
431+
432+
const client = new CodebuffClient({
433+
apiKey,
434+
cwd: tmpDir,
435+
projectFiles: PROJECT_FILES,
436+
agentDefinitions: [base2Free as AgentDefinition, contextPruner],
437+
})
438+
439+
const sessionState = await initialSessionState({
440+
cwd: tmpDir,
441+
projectFiles: PROJECT_FILES,
442+
})
443+
const runStateWithMessages = withMessageHistory({
444+
runState: {
445+
sessionState,
446+
output: { type: 'error', message: '' },
447+
},
448+
messages: createComplexMidTurnPrunedConversation(),
449+
})
450+
451+
const events: PrintModeEvent[] = []
452+
const run = await client.run({
453+
agent: base2Free.id,
454+
prompt: '',
455+
previousRun: runStateWithMessages,
456+
maxAgentSteps: 6,
457+
handleEvent: (event) => {
458+
events.push(event)
459+
},
460+
})
461+
462+
if (run.output.type === 'error') {
463+
throw new Error(run.output.message)
464+
}
465+
466+
const textOutput = events
467+
.filter((e) => e.type === 'text')
468+
.map((e) => (e as { type: 'text'; text: string }).text)
469+
.join('')
470+
const hadToolCalls = events.some((e) => e.type === 'tool_call')
471+
const imitationMatches = detectSummaryImitation(textOutput)
472+
473+
expect(hadToolCalls).toBe(true)
474+
expect(imitationMatches).toEqual([])
475+
} finally {
476+
await fs.promises.rm(tmpDir, { recursive: true, force: true })
477+
}
478+
},
479+
{ timeout: 300_000 },
480+
)
332481
})

0 commit comments

Comments
 (0)