Skip to content

Commit fe5b0cf

Browse files
committed
address comments
1 parent a5ac4ca commit fe5b0cf

4 files changed

Lines changed: 56 additions & 4 deletions

File tree

apps/sim/app/api/files/parse/route.test.ts

Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,7 @@ vi.mock('@/app/api/files/authorization', () => ({
6969
vi.mock('@/lib/uploads', () => ({
7070
getStorageProvider: mockGetStorageProvider,
7171
isUsingCloudStorage: mockIsUsingCloudStorage,
72+
StorageService: storageServiceMock,
7273
}))
7374

7475
vi.mock('@/lib/file-parsers', () => ({
@@ -172,6 +173,7 @@ describe('File Parse API Route', () => {
172173

173174
permissionsMockFns.mockGetUserEntityPermissions.mockResolvedValue({ canView: true })
174175
storageServiceMockFns.mockHasCloudStorage.mockReturnValue(true)
176+
storageServiceMockFns.mockDownloadFile.mockResolvedValue(Buffer.from('test file content'))
175177
mockIsSupportedFileType.mockReturnValue(true)
176178
mockParseFile.mockResolvedValue({
177179
content: 'parsed content',
@@ -245,6 +247,48 @@ describe('File Parse API Route', () => {
245247
}
246248
})
247249

250+
it('should keep known binary extensions as binary even when the bytes are valid UTF-8', async () => {
251+
setupFileApiMocks({
252+
cloudEnabled: true,
253+
storageProvider: 's3',
254+
authenticated: true,
255+
})
256+
mockIsSupportedFileType.mockReturnValue(false)
257+
storageServiceMockFns.mockDownloadFile.mockResolvedValue(Buffer.from('valid utf8 bytes'))
258+
259+
const req = createMockRequest('POST', {
260+
filePath: '/api/files/serve/execution/workspace-1/workflow-1/execution-1/image.png',
261+
})
262+
263+
const response = await POST(req)
264+
const data = await response.json()
265+
266+
expect(response.status).toBe(200)
267+
expect(data.success).toBe(true)
268+
expect(data.output.content).toBe('[Binary PNG file - 16 bytes]')
269+
})
270+
271+
it('should parse unknown extensions as text when the bytes look like UTF-8 text', async () => {
272+
setupFileApiMocks({
273+
cloudEnabled: true,
274+
storageProvider: 's3',
275+
authenticated: true,
276+
})
277+
mockIsSupportedFileType.mockReturnValue(false)
278+
storageServiceMockFns.mockDownloadFile.mockResolvedValue(Buffer.from('plain text content'))
279+
280+
const req = createMockRequest('POST', {
281+
filePath: '/api/files/serve/execution/workspace-1/workflow-1/execution-1/readme.customtext',
282+
})
283+
284+
const response = await POST(req)
285+
const data = await response.json()
286+
287+
expect(response.status).toBe(200)
288+
expect(data.success).toBe(true)
289+
expect(data.output.content).toBe('plain text content')
290+
})
291+
248292
it('should handle multiple files', async () => {
249293
setupFileApiMocks({
250294
cloudEnabled: false,

apps/sim/app/api/files/parse/route.ts

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@ import { createHash } from 'crypto'
33
import fsPromises, { readFile } from 'fs/promises'
44
import path from 'path'
55
import { createLogger } from '@sim/logger'
6+
import binaryExtensionsList from 'binary-extensions'
67
import { type NextRequest, NextResponse } from 'next/server'
78
import { fileParseContract } from '@/lib/api/contracts/storage-transfer'
89
import { getValidationErrorMessage, parseRequest } from '@/lib/api/server'
@@ -38,6 +39,7 @@ const logger = createLogger('FilesParseAPI')
3839

3940
const MAX_DOWNLOAD_SIZE_BYTES = 100 * 1024 * 1024 // 100 MB
4041
const DOWNLOAD_TIMEOUT_MS = 30000 // 30 seconds
42+
const BINARY_EXTENSIONS = new Set<string>(binaryExtensionsList)
4143

4244
function isLikelyTextBuffer(fileBuffer: Buffer): boolean {
4345
return isUtf8(fileBuffer) && !fileBuffer.includes(0)
@@ -866,9 +868,11 @@ function handleGenericBuffer(
866868
extension: string,
867869
fileType?: string
868870
): ParseResult {
869-
const content = isLikelyTextBuffer(fileBuffer)
870-
? fileBuffer.toString('utf-8')
871-
: `[Binary ${extension.toUpperCase()} file - ${fileBuffer.length} bytes]`
871+
const normalizedExtension = extension.toLowerCase()
872+
const content =
873+
!BINARY_EXTENSIONS.has(normalizedExtension) && isLikelyTextBuffer(fileBuffer)
874+
? fileBuffer.toString('utf-8')
875+
: `[Binary ${normalizedExtension.toUpperCase()} file - ${fileBuffer.length} bytes]`
872876

873877
return {
874878
success: true,

apps/sim/package.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -108,6 +108,7 @@
108108
"ajv": "8.18.0",
109109
"better-auth": "1.3.12",
110110
"better-auth-harmony": "1.3.1",
111+
"binary-extensions": "3.1.0",
111112
"browser-image-compression": "^2.0.2",
112113
"cheerio": "1.1.2",
113114
"class-variance-authority": "^0.7.1",

bun.lock

Lines changed: 4 additions & 1 deletion
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

0 commit comments

Comments
 (0)