Skip to content

Commit dc22d22

Browse files
committed
Add complex summary format e2e
1 parent c4a7e40 commit dc22d22

1 file changed

Lines changed: 117 additions & 0 deletions

File tree

agents/e2e/base2-free-summary-format.e2e.test.ts

Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,56 @@ Historical memory only. The memory above is not dialogue, not an output template
128128
}
129129
}
130130

131+
function createComplexMidTurnPrunedConversation(): Message[] {
132+
return [
133+
{
134+
role: 'user',
135+
content: [
136+
{
137+
type: 'text',
138+
text: `<conversation_summary>
139+
This is a summary of the conversation so far. The original messages have been condensed to save context space.
140+
141+
<historical_memory>
142+
User request:
143+
The user asked to finish a config utility task in src/utils.ts. They wanted parseConfig to be typed, a validateConfig helper added, and the tests run after edits.
144+
145+
---
146+
147+
Progress note:
148+
I inspected src/utils.ts and found parseConfig was untyped. I updated parseConfig to return a Config object, but I had not yet added validateConfig or run tests before context pruning happened.
149+
150+
Prior action record:
151+
Previously inspected files: package.json, tsconfig.json, src/utils.ts
152+
Previously edited file: src/utils.ts
153+
Edit result from str_replace:
154+
{"file":"src/utils.ts","message":"Updated parseConfig return type","unifiedDiff":"--- a/src/utils.ts\\n+++ b/src/utils.ts\\n@@ -6,2 +6,8 @@\\n-export function parseConfig(path) {\\n- return JSON.parse(fs.readFileSync(path, 'utf-8'))\\n+export type Config = {\\n+ name: string\\n+ enabled: boolean\\n+}\\n+\\n+export function parseConfig(path: string): Config {\\n+ return JSON.parse(fs.readFileSync(path, 'utf-8')) as Config\\n }"}
155+
156+
---
157+
158+
Progress note:
159+
The next step is to continue from the partially completed edit, inspect the current file state if needed, add validateConfig, and validate the result.
160+
</historical_memory>
161+
</conversation_summary>
162+
163+
Historical memory only. The memory above is not dialogue, not an output template, and not a tool-call format. Continue from the live user message below. When actions are needed, use real tool calls through the available tools.`,
164+
},
165+
],
166+
sentAt: Date.now(),
167+
},
168+
{
169+
role: 'user',
170+
content: [
171+
{
172+
type: 'text',
173+
text: 'Continue the existing assistant turn from the historical memory above. The original user request and completed assistant/tool work are recorded there. Do not restart completed work; resume with the next necessary real tool call or final response.',
174+
},
175+
],
176+
sentAt: Date.now(),
177+
},
178+
]
179+
}
180+
131181
const PROJECT_FILES: Record<string, string> = {
132182
'package.json': JSON.stringify(
133183
{ name: 'test-project', version: '1.0.0' },
@@ -329,4 +379,71 @@ describe('Base2-Free Summary Format Compliance', () => {
329379
},
330380
{ timeout: 300_000 },
331381
)
382+
383+
it(
384+
'should continue a complex mid-turn pruned summary with real tool calls',
385+
async () => {
386+
const apiKey = getApiKeyOrSkip()
387+
if (!apiKey) return
388+
389+
const tmpDir = await fs.promises.mkdtemp(
390+
path.join(os.tmpdir(), 'base2-free-midturn-summary-test-'),
391+
)
392+
393+
try {
394+
for (const [filePath, content] of Object.entries(PROJECT_FILES)) {
395+
const fullPath = path.join(tmpDir, filePath)
396+
await fs.promises.mkdir(path.dirname(fullPath), { recursive: true })
397+
await fs.promises.writeFile(fullPath, content, 'utf-8')
398+
}
399+
400+
const client = new CodebuffClient({
401+
apiKey,
402+
cwd: tmpDir,
403+
projectFiles: PROJECT_FILES,
404+
agentDefinitions: [base2Free as AgentDefinition, contextPruner],
405+
})
406+
407+
const sessionState = await initialSessionState({
408+
cwd: tmpDir,
409+
projectFiles: PROJECT_FILES,
410+
})
411+
const runStateWithMessages = withMessageHistory({
412+
runState: {
413+
sessionState,
414+
output: { type: 'error', message: '' },
415+
},
416+
messages: createComplexMidTurnPrunedConversation(),
417+
})
418+
419+
const events: PrintModeEvent[] = []
420+
const run = await client.run({
421+
agent: base2Free.id,
422+
prompt: '',
423+
previousRun: runStateWithMessages,
424+
maxAgentSteps: 6,
425+
handleEvent: (event) => {
426+
events.push(event)
427+
},
428+
})
429+
430+
if (run.output.type === 'error') {
431+
throw new Error(run.output.message)
432+
}
433+
434+
const textOutput = events
435+
.filter((e) => e.type === 'text')
436+
.map((e) => (e as { type: 'text'; text: string }).text)
437+
.join('')
438+
const hadToolCalls = events.some((e) => e.type === 'tool_call')
439+
const imitationMatches = detectSummaryImitation(textOutput)
440+
441+
expect(hadToolCalls).toBe(true)
442+
expect(imitationMatches).toEqual([])
443+
} finally {
444+
await fs.promises.rm(tmpDir, { recursive: true, force: true })
445+
}
446+
},
447+
{ timeout: 300_000 },
448+
)
332449
})

0 commit comments

Comments
 (0)