Skip to content

Commit 6aaa5c4

Browse files
committed
fix deepseek image attachments
1 parent 6877b73 commit 6aaa5c4

3 files changed

Lines changed: 216 additions & 6 deletions

File tree

Lines changed: 114 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,114 @@
1+
import { describe, expect, it, mock } from 'bun:test'
2+
3+
import {
4+
createDeepSeekRequest,
5+
normalizeDeepSeekRequestBody,
6+
} from '../deepseek'
7+
8+
import type { ChatCompletionRequestBody } from '../types'
9+
10+
describe('normalizeDeepSeekRequestBody', () => {
11+
it('converts multimodal user content into DeepSeek text content without mutating input', () => {
12+
const body: ChatCompletionRequestBody = {
13+
model: 'deepseek/deepseek-v4-pro',
14+
messages: [
15+
{
16+
role: 'user',
17+
content: [
18+
{ type: 'text', text: 'What is in this image?' },
19+
{
20+
type: 'image_url',
21+
image_url: { url: 'data:image/png;base64,AAECAw==' },
22+
},
23+
],
24+
},
25+
],
26+
}
27+
28+
const normalized = normalizeDeepSeekRequestBody(body)
29+
30+
expect(normalized.messages[0].content).toBe(
31+
'What is in this image?\n\n[1 image was omitted because the DeepSeek API does not support image input.]',
32+
)
33+
expect(body.messages[0].content).toEqual([
34+
{ type: 'text', text: 'What is in this image?' },
35+
{
36+
type: 'image_url',
37+
image_url: { url: 'data:image/png;base64,AAECAw==' },
38+
},
39+
])
40+
})
41+
42+
it('keeps text-only messages unchanged', () => {
43+
const body: ChatCompletionRequestBody = {
44+
model: 'deepseek/deepseek-v4-pro',
45+
messages: [{ role: 'user', content: 'Hello' }],
46+
}
47+
48+
expect(normalizeDeepSeekRequestBody(body)).toEqual({
49+
...body,
50+
model: 'deepseek-v4-pro',
51+
})
52+
})
53+
})
54+
55+
describe('createDeepSeekRequest', () => {
56+
it('sends DeepSeek-compatible text content when the request contains an image attachment', async () => {
57+
let sentBody: Record<string, unknown> | null = null
58+
const mockFetch = mock(
59+
async (_url: string | URL | Request, init?: RequestInit) => {
60+
sentBody = JSON.parse(init?.body as string)
61+
return new Response(JSON.stringify({ ok: true }), { status: 200 })
62+
},
63+
) as unknown as typeof globalThis.fetch
64+
65+
const body: ChatCompletionRequestBody = {
66+
model: 'deepseek/deepseek-v4-pro',
67+
messages: [
68+
{ role: 'system', content: 'You are a coding assistant.' },
69+
{
70+
role: 'user',
71+
content: [
72+
{ type: 'text', text: 'Please inspect this screenshot.' },
73+
{
74+
type: 'image_url',
75+
image_url: { url: 'data:image/jpeg;base64,/9j/4AAQSkZJRg==' },
76+
},
77+
],
78+
},
79+
],
80+
stream: true,
81+
reasoning: { enabled: true, effort: 'medium' },
82+
provider: { order: ['DeepSeek'] },
83+
transforms: ['middle-out'],
84+
codebuff_metadata: { run_id: 'run-1', cost_mode: 'free' },
85+
usage: { include: true },
86+
}
87+
88+
await createDeepSeekRequest({
89+
body,
90+
originalModel: body.model,
91+
fetch: mockFetch,
92+
})
93+
94+
expect(sentBody).toMatchObject({
95+
model: 'deepseek-v4-pro',
96+
stream: true,
97+
stream_options: { include_usage: true },
98+
thinking: { type: 'enabled', reasoning_effort: 'high' },
99+
})
100+
expect(sentBody).not.toHaveProperty('reasoning')
101+
expect(sentBody).not.toHaveProperty('provider')
102+
expect(sentBody).not.toHaveProperty('transforms')
103+
expect(sentBody).not.toHaveProperty('codebuff_metadata')
104+
expect(sentBody).not.toHaveProperty('usage')
105+
106+
const capturedBody = sentBody as unknown as Record<string, unknown>
107+
const messages = capturedBody.messages as Array<{ content: string }>
108+
expect(messages[1].content).toBe(
109+
'Please inspect this screenshot.\n\n[1 image was omitted because the DeepSeek API does not support image input.]',
110+
)
111+
expect(JSON.stringify(sentBody)).not.toContain('image_url')
112+
expect(JSON.stringify(body)).toContain('image_url')
113+
})
114+
})

web/src/llm-api/deepseek.ts

Lines changed: 80 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -91,16 +91,91 @@ function toDeepSeekReasoningEffort(effort: unknown): 'high' | 'max' {
9191
return effort === 'max' || effort === 'xhigh' ? 'max' : 'high'
9292
}
9393

94-
function createDeepSeekRequest(params: {
94+
function unsupportedAttachmentNotice(kind: string, count: number): string {
95+
const noun = count === 1 ? kind : `${kind}s`
96+
const verb = count === 1 ? 'was' : 'were'
97+
return `[${count} ${noun} ${verb} omitted because the DeepSeek API does not support ${kind} input.]`
98+
}
99+
100+
function contentPartsToDeepSeekText(
101+
content: NonNullable<
102+
ChatCompletionRequestBody['messages'][number]['content']
103+
>,
104+
): string {
105+
if (!Array.isArray(content)) {
106+
return content
107+
}
108+
109+
const textParts: string[] = []
110+
let imageCount = 0
111+
let fileCount = 0
112+
let unsupportedCount = 0
113+
114+
for (const part of content) {
115+
switch (part.type) {
116+
case 'text': {
117+
if (typeof part.text === 'string' && part.text.length > 0) {
118+
textParts.push(part.text)
119+
}
120+
break
121+
}
122+
case 'image_url': {
123+
imageCount += 1
124+
break
125+
}
126+
case 'file': {
127+
fileCount += 1
128+
break
129+
}
130+
default: {
131+
unsupportedCount += 1
132+
break
133+
}
134+
}
135+
}
136+
137+
if (imageCount > 0) {
138+
textParts.push(unsupportedAttachmentNotice('image', imageCount))
139+
}
140+
if (fileCount > 0) {
141+
textParts.push(unsupportedAttachmentNotice('file', fileCount))
142+
}
143+
if (unsupportedCount > 0) {
144+
textParts.push(
145+
unsupportedAttachmentNotice('unsupported content part', unsupportedCount),
146+
)
147+
}
148+
149+
return textParts.join('\n\n')
150+
}
151+
152+
export function normalizeDeepSeekRequestBody(
153+
body: ChatCompletionRequestBody,
154+
originalModel: string = body.model,
155+
): ChatCompletionRequestBody {
156+
return {
157+
...body,
158+
model: getDeepSeekModelId(originalModel),
159+
messages: body.messages.map((message) => ({
160+
...message,
161+
content:
162+
message.content === undefined || message.content === null
163+
? message.content
164+
: contentPartsToDeepSeekText(message.content),
165+
})),
166+
}
167+
}
168+
169+
export function createDeepSeekRequest(params: {
95170
body: ChatCompletionRequestBody
96171
originalModel: string
97172
fetch: typeof globalThis.fetch
98173
}) {
99174
const { body, originalModel, fetch } = params
100-
const deepseekBody: Record<string, unknown> = {
101-
...body,
102-
model: getDeepSeekModelId(originalModel),
103-
}
175+
const deepseekBody = normalizeDeepSeekRequestBody(
176+
body,
177+
originalModel,
178+
) as unknown as Record<string, unknown>
104179

105180
// DeepSeek uses `thinking` instead of OpenRouter's `reasoning`.
106181
if (deepseekBody.reasoning && typeof deepseekBody.reasoning === 'object') {

web/src/llm-api/types.ts

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@ export interface CodebuffMetadata {
1515

1616
export interface ChatMessage {
1717
role: 'system' | 'user' | 'assistant' | 'tool'
18-
content?: string | null
18+
content?: string | ChatCompletionContentPart[] | null
1919
name?: string
2020
tool_calls?: Array<{
2121
id: string
@@ -28,6 +28,27 @@ export interface ChatMessage {
2828
tool_call_id?: string
2929
}
3030

31+
export type ChatCompletionContentPart =
32+
| {
33+
type: 'text'
34+
text?: string
35+
}
36+
| {
37+
type: 'image_url'
38+
image_url?: string | { url?: string }
39+
}
40+
| {
41+
type: 'file'
42+
file?: {
43+
filename?: string
44+
file_data?: string
45+
}
46+
}
47+
| {
48+
type: string
49+
[key: string]: unknown
50+
}
51+
3152
export interface ChatCompletionTool {
3253
id?: string
3354
type: string

0 commit comments

Comments
 (0)