Skip to content

Commit c9239b5

Browse files
committed
cleanup
1 parent 233a3ee commit c9239b5

9 files changed

Lines changed: 29 additions & 114 deletions

File tree

apps/sim/app/api/tools/textract/parse/route.ts

Lines changed: 17 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -496,7 +496,10 @@ export async function POST(request: NextRequest) {
496496
}
497497
}
498498

499-
const { bytes } = await fetchDocumentBytes(fileUrl)
499+
const { bytes, contentType } = await fetchDocumentBytes(fileUrl)
500+
501+
// Track if this is a PDF for better error messaging
502+
const isPdf = contentType.includes('pdf') || fileUrl.toLowerCase().endsWith('.pdf')
500503

501504
const uri = '/'
502505

@@ -559,15 +562,27 @@ export async function POST(request: NextRequest) {
559562
logger.error(`[${requestId}] Textract API error:`, errorText)
560563

561564
let errorMessage = `Textract API error: ${textractResponse.statusText}`
565+
let isUnsupportedFormat = false
562566
try {
563567
const errorJson = JSON.parse(errorText)
564568
if (errorJson.Message) {
565569
errorMessage = errorJson.Message
566570
} else if (errorJson.__type) {
567571
errorMessage = `${errorJson.__type}: ${errorJson.message || errorText}`
568572
}
573+
// Check for unsupported document format error
574+
isUnsupportedFormat =
575+
errorJson.__type === 'UnsupportedDocumentException' ||
576+
errorJson.Message?.toLowerCase().includes('unsupported document') ||
577+
errorText.toLowerCase().includes('unsupported document')
569578
} catch {
570-
// Use default error message
579+
isUnsupportedFormat = errorText.toLowerCase().includes('unsupported document')
580+
}
581+
582+
// Provide helpful message for unsupported format (likely multi-page PDF)
583+
if (isUnsupportedFormat && isPdf) {
584+
errorMessage =
585+
'This document format is not supported in Single Page mode. If this is a multi-page PDF, please use "Multi-Page (PDF, TIFF via S3)" mode instead, which requires uploading your document to S3 first. Single Page mode only supports JPEG, PNG, and single-page PDF files.'
571586
}
572587

573588
return NextResponse.json(

apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/chat/components/output-select/output-select.tsx

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -129,10 +129,6 @@ export function OutputSelect({
129129
? baselineWorkflow.blocks?.[block.id]?.subBlocks?.responseFormat?.value
130130
: subBlockValues?.[block.id]?.responseFormat
131131
const responseFormat = parseResponseFormatSafely(responseFormatValue, block.id)
132-
const operationValue =
133-
shouldUseBaseline && baselineWorkflow
134-
? baselineWorkflow.blocks?.[block.id]?.subBlocks?.operation?.value
135-
: subBlockValues?.[block.id]?.operation
136132

137133
let outputsToProcess: Record<string, unknown> = {}
138134

apps/sim/app/workspace/[workspaceId]/w/[workflowId]/components/panel/components/editor/components/sub-block/components/tag-dropdown/tag-dropdown.tsx

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -242,7 +242,6 @@ const getOutputTypeForPath = (
242242
const subBlocks = mergedSubBlocksOverride ?? (blockState?.subBlocks || {})
243243
return getBlockOutputType(block.type, outputPath, subBlocks)
244244
} else if (blockConfig) {
245-
// Pass full subBlocks to support tool selectors that use any field (operation, provider, etc.)
246245
const blockState = useWorkflowStore.getState().blocks[blockId]
247246
const subBlocks = mergedSubBlocksOverride ?? (blockState?.subBlocks || {})
248247
return getToolOutputType(blockConfig, subBlocks, outputPath)
@@ -1211,7 +1210,6 @@ export const TagDropdown: React.FC<TagDropdownProps> = ({
12111210
: allTags
12121211
}
12131212
} else {
1214-
// Pass full subBlocks to support tool selectors that use any field (operation, provider, etc.)
12151213
const toolOutputPaths = getToolOutputPaths(blockConfig, mergedSubBlocks)
12161214

12171215
if (toolOutputPaths.length > 0) {

apps/sim/blocks/blocks/file.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -150,7 +150,7 @@ export const FileV2Block: BlockConfig<FileParserOutput> = {
150150
title: 'Files',
151151
type: 'short-input' as SubBlockType,
152152
canonicalParamId: 'fileInput',
153-
placeholder: 'File URL or reference from previous block',
153+
placeholder: 'File URL',
154154
mode: 'advanced',
155155
},
156156
],

apps/sim/blocks/blocks/mistral_parse.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -165,7 +165,7 @@ export const MistralParseV2Block: BlockConfig<MistralParserOutput> = {
165165
title: 'PDF Document',
166166
type: 'short-input' as SubBlockType,
167167
canonicalParamId: 'document',
168-
placeholder: 'Document URL or reference from previous block',
168+
placeholder: 'Document URL',
169169
mode: 'advanced',
170170
},
171171
{

apps/sim/blocks/blocks/pulse.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ export const PulseBlock: BlockConfig<PulseParserOutput> = {
2929
title: 'Document',
3030
type: 'short-input' as SubBlockType,
3131
canonicalParamId: 'document',
32-
placeholder: 'Document URL or reference from previous block',
32+
placeholder: 'Document URL',
3333
mode: 'advanced',
3434
},
3535
{

apps/sim/blocks/blocks/reducto.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ export const ReductoBlock: BlockConfig<ReductoParserOutput> = {
2828
title: 'PDF Document',
2929
type: 'short-input' as SubBlockType,
3030
canonicalParamId: 'document',
31-
placeholder: 'Document URL or reference from previous block',
31+
placeholder: 'Document URL',
3232
mode: 'advanced',
3333
},
3434
{

apps/sim/blocks/blocks/textract.ts

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -15,20 +15,22 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
1515
subBlocks: [
1616
{
1717
id: 'processingMode',
18-
title: 'Document Type',
18+
title: 'Processing Mode',
1919
type: 'dropdown' as SubBlockType,
2020
options: [
21-
{ id: 'sync', label: 'Single Page' },
22-
{ id: 'async', label: 'Multi-Page' },
21+
{ id: 'sync', label: 'Single Page (JPEG, PNG, 1-page PDF)' },
22+
{ id: 'async', label: 'Multi-Page (PDF, TIFF via S3)' },
2323
],
24+
tooltip:
25+
'Single Page uses synchronous API for JPEG, PNG, or single-page PDF. Multi-Page uses async API for multi-page PDF/TIFF stored in S3.',
2426
},
2527
{
2628
id: 'fileUpload',
2729
title: 'Document',
2830
type: 'file-upload' as SubBlockType,
2931
canonicalParamId: 'document',
30-
acceptedTypes: 'application/pdf,image/jpeg,image/png,image/tiff',
31-
placeholder: 'Upload a document',
32+
acceptedTypes: 'image/jpeg,image/png,application/pdf',
33+
placeholder: 'Upload JPEG, PNG, or single-page PDF (max 10MB)',
3234
condition: {
3335
field: 'processingMode',
3436
value: 'async',
@@ -42,7 +44,7 @@ export const TextractBlock: BlockConfig<TextractParserOutput> = {
4244
title: 'Document',
4345
type: 'short-input' as SubBlockType,
4446
canonicalParamId: 'document',
45-
placeholder: 'Document URL or reference from previous block',
47+
placeholder: 'URL to JPEG, PNG, or single-page PDF',
4648
condition: {
4749
field: 'processingMode',
4850
value: 'async',

apps/sim/tools/mistral/types.ts

Lines changed: 0 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -1,176 +1,80 @@
11
import type { ToolResponse } from '@/tools/types'
22

3-
/**
4-
* Input parameters for the Mistral OCR parser tool
5-
*/
63
export interface MistralParserInput {
7-
/** URL to a PDF document to be processed */
84
filePath: string
9-
10-
/** File upload data (from file-upload component) */
115
fileUpload?: any
12-
13-
/** Internal file path flag (for presigned URL conversion) */
146
_internalFilePath?: string
15-
16-
/** Mistral API key for authentication */
177
apiKey: string
18-
19-
/** Output format for the extracted content (default: 'markdown') */
208
resultType?: 'markdown' | 'text' | 'json'
21-
22-
/** Whether to include base64-encoded images in the response */
239
includeImageBase64?: boolean
24-
25-
/** Specific pages to process (zero-indexed) */
2610
pages?: number[]
27-
28-
/** Maximum number of images to extract from the PDF */
2911
imageLimit?: number
30-
31-
/** Minimum height and width (in pixels) for images to extract */
3212
imageMinSize?: number
3313
}
3414

35-
/**
36-
* Usage information returned by the Mistral OCR API
37-
*/
3815
export interface MistralOcrUsageInfo {
39-
/** Number of pages processed in the document */
4016
pagesProcessed: number
41-
42-
/** Size of the document in bytes */
4317
docSizeBytes: number | null
4418
}
4519

46-
/**
47-
* Metadata about the processed document
48-
*/
4920
export interface MistralParserMetadata {
50-
/** Unique identifier for this OCR job */
5121
jobId: string
52-
53-
/** File type of the document (typically 'pdf') */
5422
fileType: string
55-
56-
/** Filename extracted from the document URL */
5723
fileName: string
58-
59-
/** Source type (always 'url' for now) */
6024
source: 'url'
61-
62-
/** Original URL to the document (only included for user-provided URLs) */
6325
sourceUrl?: string
64-
65-
/** Total number of pages in the document */
6626
pageCount: number
67-
68-
/** Usage statistics from the OCR processing */
6927
usageInfo?: MistralOcrUsageInfo
70-
71-
/** The Mistral OCR model used for processing */
7228
model: string
73-
74-
/** The output format that was requested */
7529
resultType?: 'markdown' | 'text' | 'json'
76-
77-
/** ISO timestamp when the document was processed */
7830
processedAt: string
7931
}
8032

81-
/**
82-
* Output data structure from the Mistral OCR parser
83-
*/
8433
export interface MistralParserOutputData {
85-
/** Extracted content in the requested format */
8634
content: string
87-
88-
/** Metadata about the parsed document and processing */
8935
metadata: MistralParserMetadata
9036
}
9137

92-
/**
93-
* Complete response from the Mistral OCR parser tool
94-
*/
9538
export interface MistralParserOutput extends ToolResponse {
96-
/** The output data containing content and metadata */
9739
output: MistralParserOutputData
9840
}
9941

100-
/**
101-
* Image bounding box and data from Mistral OCR API
102-
*/
10342
export interface MistralOcrImage {
104-
/** Image identifier */
10543
id: string
106-
/** Top-left X coordinate */
10744
top_left_x: number
108-
/** Top-left Y coordinate */
10945
top_left_y: number
110-
/** Bottom-right X coordinate */
11146
bottom_right_x: number
112-
/** Bottom-right Y coordinate */
11347
bottom_right_y: number
114-
/** Base64-encoded image data (if includeImageBase64 was true) */
11548
image_base64?: string
11649
}
11750

118-
/**
119-
* Page dimensions from Mistral OCR API
120-
*/
12151
export interface MistralOcrDimensions {
122-
/** DPI of the page */
12352
dpi: number
124-
/** Page height in pixels */
12553
height: number
126-
/** Page width in pixels */
12754
width: number
12855
}
12956

130-
/**
131-
* Page data from Mistral OCR API
132-
*/
13357
export interface MistralOcrPage {
134-
/** Page index (zero-based) */
13558
index: number
136-
/** Markdown content extracted from this page */
13759
markdown: string
138-
/** Images extracted from this page */
13960
images: MistralOcrImage[]
140-
/** Page dimensions */
14161
dimensions: MistralOcrDimensions
142-
/** Tables extracted from this page */
14362
tables: unknown[]
144-
/** Hyperlinks found on this page */
14563
hyperlinks: unknown[]
146-
/** Header content if detected */
14764
header: string | null
148-
/** Footer content if detected */
14965
footer: string | null
15066
}
15167

152-
/**
153-
* Raw usage info from Mistral OCR API
154-
*/
15568
export interface MistralOcrUsageInfoRaw {
156-
/** Number of pages processed */
15769
pages_processed: number
158-
/** Document size in bytes */
15970
doc_size_bytes: number | null
16071
}
16172

162-
/**
163-
* V2 Output - Returns raw Mistral API response structure
164-
*/
16573
export interface MistralParserV2Output extends ToolResponse {
16674
output: {
167-
/** Array of page objects with full OCR data */
16875
pages: MistralOcrPage[]
169-
/** Model used for OCR processing */
17076
model: string
171-
/** Usage statistics from the API */
17277
usage_info: MistralOcrUsageInfoRaw
173-
/** Structured annotation data as JSON string (when applicable) */
17478
document_annotation: string | null
17579
}
17680
}

0 commit comments

Comments
 (0)