Skip to content

Commit 105c65f

Browse files
committed
improvement(sandbox): expand document generation — style extraction, sandbox hardening, OOM errors, PPTX/DOCX/PDF task guards
1 parent 630db5c commit 105c65f

7 files changed

Lines changed: 415 additions & 106 deletions

File tree

apps/sim/app/api/workspaces/[id]/files/[fileId]/style/route.ts

Lines changed: 21 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -16,21 +16,23 @@ const logger = createLogger('WorkspaceFileStyleAPI')
1616

1717
/**
1818
* GET /api/workspaces/[id]/files/[fileId]/style
19-
* Extract a compact JSON style summary from an uploaded .docx or .pptx file.
20-
* Uses OOXML theme XML to return theme colors, font pair, and named styles.
21-
* Only works on binary OOXML files (ZIP format) — not on JS source files.
19+
* Extract a compact JSON style summary from an uploaded .docx, .pptx, or .pdf file.
20+
* OOXML files return theme colors, font pair, and named styles.
21+
* PDF files return page dimensions and embedded font names.
2222
*/
23+
const MAX_STYLE_FILE_BYTES = 100 * 1024 * 1024 // 100 MB
24+
2325
export const GET = withRouteHandler(
2426
async (request: NextRequest, context: { params: Promise<{ id: string; fileId: string }> }) => {
25-
const parsed = await parseRequest(workspaceFileStyleContract, request, context)
26-
if (!parsed.success) return parsed.response
27-
const { id: workspaceId, fileId } = parsed.data.params
28-
2927
const session = await getSession()
3028
if (!session?.user?.id) {
3129
return NextResponse.json({ error: 'Unauthorized' }, { status: 401 })
3230
}
3331

32+
const parsed = await parseRequest(workspaceFileStyleContract, request, context)
33+
if (!parsed.success) return parsed.response
34+
const { id: workspaceId, fileId } = parsed.data.params
35+
3436
const membership = await verifyWorkspaceMembership(session.user.id, workspaceId)
3537
if (!membership) {
3638
return NextResponse.json({ error: 'Insufficient permissions' }, { status: 403 })
@@ -42,13 +44,20 @@ export const GET = withRouteHandler(
4244
}
4345

4446
const rawExt = fileRecord.name.split('.').pop()?.toLowerCase()
45-
if (rawExt !== 'docx' && rawExt !== 'pptx') {
47+
if (rawExt !== 'docx' && rawExt !== 'pptx' && rawExt !== 'pdf') {
4648
return NextResponse.json(
47-
{ error: 'Style extraction only supports .docx and .pptx files' },
49+
{ error: 'Style extraction supports .docx, .pptx, and .pdf files' },
50+
{ status: 422 }
51+
)
52+
}
53+
const ext: 'docx' | 'pptx' | 'pdf' = rawExt
54+
55+
if (fileRecord.size > MAX_STYLE_FILE_BYTES) {
56+
return NextResponse.json(
57+
{ error: 'File is too large for style extraction (limit: 100 MB)' },
4858
{ status: 422 }
4959
)
5060
}
51-
const ext: 'docx' | 'pptx' = rawExt
5261

5362
let buffer: Buffer
5463
try {
@@ -66,17 +75,13 @@ export const GET = withRouteHandler(
6675
return NextResponse.json(
6776
{
6877
error:
69-
'File is not a compiled binary document — style extraction requires an uploaded or compiled .docx/.pptx file',
78+
'Could not extract style — file may be encrypted, corrupt, image-only, or contain no parseable style information',
7079
},
7180
{ status: 422 }
7281
)
7382
}
7483

75-
logger.info('Extracted style summary via API', {
76-
fileId,
77-
format: ext,
78-
themeName: summary.theme.name,
79-
})
84+
logger.info('Extracted style summary via API', { fileId, format: ext })
8085

8186
return NextResponse.json(summary, {
8287
headers: { 'Cache-Control': 'private, max-age=300' },

apps/sim/lib/api/contracts/workspace-files.ts

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
import { z } from 'zod'
2-
import { type ContractJsonResponse, defineRouteContract } from '@/lib/api/contracts/types'
2+
import { defineRouteContract } from '@/lib/api/contracts/types'
33

44
export const workspaceFileScopeSchema = z.enum(['active', 'archived', 'all'])
55

@@ -58,7 +58,6 @@ export const listWorkspaceFilesContract = defineRouteContract({
5858
}),
5959
},
6060
})
61-
export type ListWorkspaceFilesResponse = ContractJsonResponse<typeof listWorkspaceFilesContract>
6261

6362
export const renameWorkspaceFileContract = defineRouteContract({
6463
method: 'PATCH',
@@ -108,15 +107,30 @@ export const updateWorkspaceFileContentContract = defineRouteContract({
108107

109108
const documentStyleSummarySchema = z
110109
.object({
111-
format: z.enum(['docx', 'pptx']),
110+
format: z.enum(['docx', 'pptx', 'pdf']),
111+
// OOXML theme — present for pptx, present for docx when theme1.xml exists, absent for pdf
112112
theme: z
113113
.object({
114-
name: z.string(),
115114
colors: z.record(z.string(), z.string()),
116115
fonts: z.object({ major: z.string(), minor: z.string() }),
117116
})
118-
.passthrough(),
117+
.optional(),
118+
// docx only
119119
styles: z.array(z.object({}).passthrough()).optional(),
120+
defaults: z.object({ fontSize: z.number().optional(), font: z.string().optional() }).optional(),
121+
// pdf only
122+
pageSize: z
123+
.object({
124+
preset: z.enum(['A4', 'letter', 'custom']),
125+
widthPt: z.number().optional(),
126+
heightPt: z.number().optional(),
127+
})
128+
.optional(),
129+
fonts: z.array(z.string()).optional(),
130+
// pptx only
131+
slideCount: z.number().optional(),
132+
aspectRatio: z.enum(['16:9', '4:3', 'custom']).optional(),
133+
background: z.string().optional(),
120134
})
121135
.passthrough()
122136

0 commit comments

Comments
 (0)