Skip to content

Commit 5e2bc77

Browse files
committed
feat: image support!
1 parent 5912984 commit 5e2bc77

File tree

9 files changed

+963
-27
lines changed

9 files changed

+963
-27
lines changed

backend/src/main-prompt.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -40,6 +40,7 @@ export const mainPrompt = async (
4040

4141
const {
4242
prompt,
43+
content,
4344
sessionState: sessionState,
4445
fingerprintId,
4546
costMode,
@@ -182,6 +183,7 @@ export const mainPrompt = async (
182183
const { agentState, output } = await loopAgentSteps(ws, {
183184
userInputId: promptId,
184185
prompt,
186+
content,
185187
params: promptParams,
186188
agentType,
187189
agentState: mainAgentState,

backend/src/run-agent-step.ts

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,11 @@ import type {
3636
AssistantMessage,
3737
Message,
3838
} from '@codebuff/common/types/messages/codebuff-message'
39-
import type { ToolResultPart } from '@codebuff/common/types/messages/content-part'
39+
import type {
40+
ToolResultPart,
41+
TextPart,
42+
ImagePart,
43+
} from '@codebuff/common/types/messages/content-part'
4044
import type { PrintModeEvent } from '@codebuff/common/types/print-mode'
4145
import type {
4246
AgentTemplateType,
@@ -46,6 +50,49 @@ import type {
4650
import type { ProjectFileContext } from '@codebuff/common/util/file'
4751
import type { WebSocket } from 'ws'
4852

53+
/**
54+
* Combines prompt, params, and content into a unified message content structure
55+
*/
56+
function buildUserMessageContent(
57+
prompt: string | undefined,
58+
params: Record<string, any> | undefined,
59+
content?: Array<TextPart | ImagePart>,
60+
): string | Array<TextPart | ImagePart> {
61+
const textParts = buildArray([
62+
prompt,
63+
params && JSON.stringify(params, null, 2),
64+
])
65+
const combinedText = textParts.join('\n\n')
66+
67+
if (!content || content.length === 0) {
68+
// Only prompt/params, return as simple text
69+
return asUserMessage(combinedText)
70+
}
71+
72+
// If we have both content and prompt/params, combine them
73+
const allParts = [...content]
74+
75+
// Find the first text part and prepend our combined text, or add it as a new text part
76+
const firstTextPartIndex = allParts.findIndex((part) => part.type === 'text')
77+
if (firstTextPartIndex !== -1) {
78+
// Prepend to existing text part
79+
const textPart = allParts[firstTextPartIndex]
80+
if (textPart.type === 'text') {
81+
allParts[firstTextPartIndex] = {
82+
type: 'text' as const,
83+
text: buildArray([combinedText, textPart.text]).join('\n\n'),
84+
}
85+
}
86+
} else {
87+
// Add as new text part at the beginning
88+
allParts.unshift({ type: 'text' as const, text: combinedText })
89+
}
90+
91+
return allParts.length === 1 && allParts[0].type === 'text'
92+
? asUserMessage(allParts[0].text)
93+
: allParts
94+
}
95+
4996
export interface AgentOptions {
5097
userId: string | undefined
5198
userInputId: string
@@ -390,6 +437,7 @@ export const loopAgentSteps = async (
390437
agentType,
391438
agentState,
392439
prompt,
440+
content,
393441
params,
394442
fingerprintId,
395443
fileContext,
@@ -403,6 +451,7 @@ export const loopAgentSteps = async (
403451
agentType: AgentTemplateType
404452
agentState: AgentState
405453
prompt: string | undefined
454+
content?: Array<TextPart | ImagePart>
406455
params: Record<string, any> | undefined
407456
fingerprintId: string
408457
fileContext: ProjectFileContext
@@ -455,11 +504,7 @@ export const loopAgentSteps = async (
455504
{
456505
// Actual user prompt!
457506
role: 'user' as const,
458-
content: asUserMessage(
459-
buildArray([prompt, params && JSON.stringify(params, null, 2)]).join(
460-
'\n\n',
461-
),
462-
),
507+
content: buildUserMessageContent(prompt, params, content),
463508
keepDuringTruncation: true,
464509
},
465510
prompt &&

bun.lock

Lines changed: 147 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

common/src/actions.ts

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,8 @@ import { GrantTypeValues } from './types/grant'
55
import {
66
toolResultOutputSchema,
77
toolResultPartSchema,
8+
textPartSchema,
9+
imagePartSchema,
810
} from './types/messages/content-part'
911
import { printModeEventSchema } from './types/print-mode'
1012
import {
@@ -28,6 +30,7 @@ export const CLIENT_ACTION_SCHEMA = z.discriminatedUnion('type', [
2830
type: z.literal('prompt'),
2931
promptId: z.string(),
3032
prompt: z.string().or(z.undefined()),
33+
content: z.array(z.union([textPartSchema, imagePartSchema])).optional(),
3134
promptParams: z.record(z.string(), z.any()).optional(), // Additional json params.
3235
fingerprintId: z.string(),
3336
authToken: z.string().optional(),

npm-app/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@
4747
"git-url-parse": "^16.1.0",
4848
"ignore": "7.0.3",
4949
"isomorphic-git": "^1.29.0",
50+
"jimp": "^1.6.0",
5051
"lodash": "*",
5152
"markdown-it": "^14.1.0",
5253
"markdown-it-terminal": "^0.4.0",
Lines changed: 224 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,224 @@
1+
import { describe, test, expect, beforeEach, afterEach } from 'bun:test'
2+
import { writeFileSync, unlinkSync, mkdirSync, rmSync } from 'fs'
3+
import path from 'path'
4+
import {
5+
processImageFile,
6+
isImageFile,
7+
extractImagePaths,
8+
} from '../utils/image-handler'
9+
10+
const TEST_DIR = path.join(__dirname, 'temp-test-images')
11+
const TEST_IMAGE_PATH = path.join(TEST_DIR, 'test-image.png')
12+
const TEST_LARGE_IMAGE_PATH = path.join(TEST_DIR, 'large-image.jpg')
13+
14+
// Create a minimal PNG file (43 bytes)
15+
const MINIMAL_PNG = Buffer.from([
16+
0x89,
17+
0x50,
18+
0x4e,
19+
0x47,
20+
0x0d,
21+
0x0a,
22+
0x1a,
23+
0x0a, // PNG signature
24+
0x00,
25+
0x00,
26+
0x00,
27+
0x0d, // IHDR chunk length
28+
0x49,
29+
0x48,
30+
0x44,
31+
0x52, // IHDR
32+
0x00,
33+
0x00,
34+
0x00,
35+
0x01, // width: 1
36+
0x00,
37+
0x00,
38+
0x00,
39+
0x01, // height: 1
40+
0x08,
41+
0x02,
42+
0x00,
43+
0x00,
44+
0x00, // bit depth, color type, compression, filter, interlace
45+
0x90,
46+
0x77,
47+
0x53,
48+
0xde, // CRC
49+
0x00,
50+
0x00,
51+
0x00,
52+
0x00, // IEND chunk length
53+
0x49,
54+
0x45,
55+
0x4e,
56+
0x44, // IEND
57+
0xae,
58+
0x42,
59+
0x60,
60+
0x82, // CRC
61+
])
62+
63+
beforeEach(() => {
64+
mkdirSync(TEST_DIR, { recursive: true })
65+
writeFileSync(TEST_IMAGE_PATH, MINIMAL_PNG)
66+
67+
// Create a large fake image (10MB)
68+
const largeBuffer = Buffer.alloc(10 * 1024 * 1024, 0xff)
69+
// Add minimal JPEG header
70+
largeBuffer.writeUInt16BE(0xffd8, 0) // JPEG SOI marker
71+
largeBuffer.writeUInt16BE(0xffd9, largeBuffer.length - 2) // JPEG EOI marker
72+
writeFileSync(TEST_LARGE_IMAGE_PATH, largeBuffer)
73+
})
74+
75+
afterEach(() => {
76+
try {
77+
rmSync(TEST_DIR, { recursive: true, force: true })
78+
} catch {
79+
// Ignore cleanup errors
80+
}
81+
})
82+
83+
describe('Image Upload Functionality', () => {
84+
describe('isImageFile', () => {
85+
test('should detect valid image extensions', () => {
86+
expect(isImageFile('test.jpg')).toBe(true)
87+
expect(isImageFile('test.jpeg')).toBe(true)
88+
expect(isImageFile('test.png')).toBe(true)
89+
expect(isImageFile('test.webp')).toBe(true)
90+
expect(isImageFile('test.gif')).toBe(true)
91+
expect(isImageFile('test.bmp')).toBe(true)
92+
expect(isImageFile('test.tiff')).toBe(true)
93+
})
94+
95+
test('should reject non-image extensions', () => {
96+
expect(isImageFile('test.txt')).toBe(false)
97+
expect(isImageFile('test.js')).toBe(false)
98+
expect(isImageFile('test.pdf')).toBe(false)
99+
expect(isImageFile('test')).toBe(false)
100+
})
101+
})
102+
103+
describe('extractImagePaths', () => {
104+
test('should extract image paths from text with @ syntax', () => {
105+
const input = 'Look at this @test.png and @image.jpg files'
106+
const paths = extractImagePaths(input)
107+
expect(paths).toEqual(['test.png', 'image.jpg'])
108+
})
109+
110+
test('should ignore non-image paths', () => {
111+
const input = 'Check @script.js and @test.png'
112+
const paths = extractImagePaths(input)
113+
expect(paths).toEqual(['test.png'])
114+
})
115+
116+
test('should return empty array when no image paths found', () => {
117+
const input = 'No images here @script.js @readme.txt'
118+
const paths = extractImagePaths(input)
119+
expect(paths).toEqual([])
120+
})
121+
122+
test('should auto-detect absolute paths', () => {
123+
const input = 'Look at /path/to/image.png and ~/screenshots/photo.jpg'
124+
const paths = extractImagePaths(input)
125+
expect(paths).toEqual(['/path/to/image.png', '~/screenshots/photo.jpg'])
126+
})
127+
128+
test('should auto-detect relative paths with separators', () => {
129+
const input = 'Check ./assets/logo.png and ../images/banner.jpg'
130+
const paths = extractImagePaths(input)
131+
expect(paths).toEqual(['./assets/logo.png', '../images/banner.jpg'])
132+
})
133+
134+
test('should auto-detect quoted paths', () => {
135+
const input =
136+
'Files: "./my folder/image.png" and \'../photos/vacation.jpg\''
137+
const paths = extractImagePaths(input)
138+
expect(paths).toEqual(['./my folder/image.png', '../photos/vacation.jpg'])
139+
})
140+
141+
test('should ignore paths in code blocks', () => {
142+
const input =
143+
'See ```./test.png``` and `inline.jpg` but process ./real.png'
144+
const paths = extractImagePaths(input)
145+
expect(paths).toEqual(['./real.png'])
146+
})
147+
148+
test('should remove trailing punctuation from auto-detected paths', () => {
149+
const input = 'Look at /path/image.png, and ./other.jpg!'
150+
const paths = extractImagePaths(input)
151+
expect(paths).toEqual(['/path/image.png', './other.jpg'])
152+
})
153+
154+
test('should deduplicate paths', () => {
155+
const input = '@test.png and /absolute/test.png and @test.png again'
156+
const paths = extractImagePaths(input)
157+
expect(paths).toEqual(['test.png', '/absolute/test.png'])
158+
})
159+
160+
test('should NOT auto-detect bare filenames without separators', () => {
161+
const input = 'Mentioned logo.png and banner.jpg in the text'
162+
const paths = extractImagePaths(input)
163+
expect(paths).toEqual([])
164+
})
165+
166+
test('should handle weird characters and spaces in quoted paths', () => {
167+
const input = 'Files: "./ConstellationFS Demo · 1.21am · 09-11.jpeg" and \'../images/café ñoño (2024).png\''
168+
const paths = extractImagePaths(input)
169+
expect(paths).toEqual(['./ConstellationFS Demo · 1.21am · 09-11.jpeg', '../images/café ñoño (2024).png'])
170+
})
171+
172+
test('should auto-detect paths with spaces and special characters', () => {
173+
const input =
174+
'/Users/brandonchen/Downloads/ConstellationFS Demo · 1.21am · 09-11.jpeg'
175+
const paths = extractImagePaths(input)
176+
expect(paths).toEqual([
177+
'/Users/brandonchen/Downloads/ConstellationFS Demo · 1.21am · 09-11.jpeg',
178+
])
179+
})
180+
181+
test('should handle standalone paths with spaces as the entire input', () => {
182+
const input = ' /Users/test/My Documents/screenshot file.png '
183+
const paths = extractImagePaths(input)
184+
expect(paths).toEqual(['/Users/test/My Documents/screenshot file.png'])
185+
})
186+
})
187+
188+
describe('processImageFile', () => {
189+
test('should successfully process a valid image file', async () => {
190+
const result = await processImageFile(TEST_IMAGE_PATH, TEST_DIR)
191+
192+
expect(result.success).toBe(true)
193+
expect(result.imagePart).toBeDefined()
194+
expect(result.imagePart!.type).toBe('image')
195+
expect(['image/jpeg', 'image/png']).toContain(result.imagePart!.mediaType) // May be compressed to JPEG
196+
expect(result.imagePart!.filename).toBe('test-image.png')
197+
expect(result.imagePart!.image).toMatch(/^[A-Za-z0-9+/]+=*$/) // Base64 regex
198+
})
199+
200+
test('should reject file that does not exist', async () => {
201+
const result = await processImageFile('nonexistent.png', TEST_DIR)
202+
203+
expect(result.success).toBe(false)
204+
expect(result.error).toContain('File not found')
205+
})
206+
207+
test('should reject files that are too large', async () => {
208+
const result = await processImageFile(TEST_LARGE_IMAGE_PATH, TEST_DIR)
209+
210+
expect(result.success).toBe(false)
211+
expect(result.error).toContain('File too large')
212+
})
213+
214+
test('should reject non-image files', async () => {
215+
const textFilePath = path.join(TEST_DIR, 'test.txt')
216+
writeFileSync(textFilePath, 'hello world')
217+
218+
const result = await processImageFile(textFilePath, TEST_DIR)
219+
220+
expect(result.success).toBe(false)
221+
expect(result.error).toContain('Unsupported image format')
222+
})
223+
})
224+
})

0 commit comments

Comments
 (0)