|
1 | 1 | import type { ToolResponse } from '@/tools/types' |
2 | 2 |
|
3 | | -/** |
4 | | - * Input parameters for the Mistral OCR parser tool |
5 | | - */ |
6 | 3 | export interface MistralParserInput { |
7 | | - /** URL to a PDF document to be processed */ |
8 | 4 | filePath: string |
9 | | - |
10 | | - /** File upload data (from file-upload component) */ |
11 | 5 | fileUpload?: any |
12 | | - |
13 | | - /** Internal file path flag (for presigned URL conversion) */ |
14 | 6 | _internalFilePath?: string |
15 | | - |
16 | | - /** Mistral API key for authentication */ |
17 | 7 | apiKey: string |
18 | | - |
19 | | - /** Output format for the extracted content (default: 'markdown') */ |
20 | 8 | resultType?: 'markdown' | 'text' | 'json' |
21 | | - |
22 | | - /** Whether to include base64-encoded images in the response */ |
23 | 9 | includeImageBase64?: boolean |
24 | | - |
25 | | - /** Specific pages to process (zero-indexed) */ |
26 | 10 | pages?: number[] |
27 | | - |
28 | | - /** Maximum number of images to extract from the PDF */ |
29 | 11 | imageLimit?: number |
30 | | - |
31 | | - /** Minimum height and width (in pixels) for images to extract */ |
32 | 12 | imageMinSize?: number |
33 | 13 | } |
34 | 14 |
|
35 | | -/** |
36 | | - * Usage information returned by the Mistral OCR API |
37 | | - */ |
38 | 15 | export interface MistralOcrUsageInfo { |
39 | | - /** Number of pages processed in the document */ |
40 | 16 | pagesProcessed: number |
41 | | - |
42 | | - /** Size of the document in bytes */ |
43 | 17 | docSizeBytes: number | null |
44 | 18 | } |
45 | 19 |
|
46 | | -/** |
47 | | - * Metadata about the processed document |
48 | | - */ |
49 | 20 | export interface MistralParserMetadata { |
50 | | - /** Unique identifier for this OCR job */ |
51 | 21 | jobId: string |
52 | | - |
53 | | - /** File type of the document (typically 'pdf') */ |
54 | 22 | fileType: string |
55 | | - |
56 | | - /** Filename extracted from the document URL */ |
57 | 23 | fileName: string |
58 | | - |
59 | | - /** Source type (always 'url' for now) */ |
60 | 24 | source: 'url' |
61 | | - |
62 | | - /** Original URL to the document (only included for user-provided URLs) */ |
63 | 25 | sourceUrl?: string |
64 | | - |
65 | | - /** Total number of pages in the document */ |
66 | 26 | pageCount: number |
67 | | - |
68 | | - /** Usage statistics from the OCR processing */ |
69 | 27 | usageInfo?: MistralOcrUsageInfo |
70 | | - |
71 | | - /** The Mistral OCR model used for processing */ |
72 | 28 | model: string |
73 | | - |
74 | | - /** The output format that was requested */ |
75 | 29 | resultType?: 'markdown' | 'text' | 'json' |
76 | | - |
77 | | - /** ISO timestamp when the document was processed */ |
78 | 30 | processedAt: string |
79 | 31 | } |
80 | 32 |
|
81 | | -/** |
82 | | - * Output data structure from the Mistral OCR parser |
83 | | - */ |
84 | 33 | export interface MistralParserOutputData { |
85 | | - /** Extracted content in the requested format */ |
86 | 34 | content: string |
87 | | - |
88 | | - /** Metadata about the parsed document and processing */ |
89 | 35 | metadata: MistralParserMetadata |
90 | 36 | } |
91 | 37 |
|
92 | | -/** |
93 | | - * Complete response from the Mistral OCR parser tool |
94 | | - */ |
95 | 38 | export interface MistralParserOutput extends ToolResponse { |
96 | | - /** The output data containing content and metadata */ |
97 | 39 | output: MistralParserOutputData |
98 | 40 | } |
99 | 41 |
|
100 | | -/** |
101 | | - * Image bounding box and data from Mistral OCR API |
102 | | - */ |
103 | 42 | export interface MistralOcrImage { |
104 | | - /** Image identifier */ |
105 | 43 | id: string |
106 | | - /** Top-left X coordinate */ |
107 | 44 | top_left_x: number |
108 | | - /** Top-left Y coordinate */ |
109 | 45 | top_left_y: number |
110 | | - /** Bottom-right X coordinate */ |
111 | 46 | bottom_right_x: number |
112 | | - /** Bottom-right Y coordinate */ |
113 | 47 | bottom_right_y: number |
114 | | - /** Base64-encoded image data (if includeImageBase64 was true) */ |
115 | 48 | image_base64?: string |
116 | 49 | } |
117 | 50 |
|
118 | | -/** |
119 | | - * Page dimensions from Mistral OCR API |
120 | | - */ |
121 | 51 | export interface MistralOcrDimensions { |
122 | | - /** DPI of the page */ |
123 | 52 | dpi: number |
124 | | - /** Page height in pixels */ |
125 | 53 | height: number |
126 | | - /** Page width in pixels */ |
127 | 54 | width: number |
128 | 55 | } |
129 | 56 |
|
130 | | -/** |
131 | | - * Page data from Mistral OCR API |
132 | | - */ |
133 | 57 | export interface MistralOcrPage { |
134 | | - /** Page index (zero-based) */ |
135 | 58 | index: number |
136 | | - /** Markdown content extracted from this page */ |
137 | 59 | markdown: string |
138 | | - /** Images extracted from this page */ |
139 | 60 | images: MistralOcrImage[] |
140 | | - /** Page dimensions */ |
141 | 61 | dimensions: MistralOcrDimensions |
142 | | - /** Tables extracted from this page */ |
143 | 62 | tables: unknown[] |
144 | | - /** Hyperlinks found on this page */ |
145 | 63 | hyperlinks: unknown[] |
146 | | - /** Header content if detected */ |
147 | 64 | header: string | null |
148 | | - /** Footer content if detected */ |
149 | 65 | footer: string | null |
150 | 66 | } |
151 | 67 |
|
152 | | -/** |
153 | | - * Raw usage info from Mistral OCR API |
154 | | - */ |
155 | 68 | export interface MistralOcrUsageInfoRaw { |
156 | | - /** Number of pages processed */ |
157 | 69 | pages_processed: number |
158 | | - /** Document size in bytes */ |
159 | 70 | doc_size_bytes: number | null |
160 | 71 | } |
161 | 72 |
|
162 | | -/** |
163 | | - * V2 Output - Returns raw Mistral API response structure |
164 | | - */ |
165 | 73 | export interface MistralParserV2Output extends ToolResponse { |
166 | 74 | output: { |
167 | | - /** Array of page objects with full OCR data */ |
168 | 75 | pages: MistralOcrPage[] |
169 | | - /** Model used for OCR processing */ |
170 | 76 | model: string |
171 | | - /** Usage statistics from the API */ |
172 | 77 | usage_info: MistralOcrUsageInfoRaw |
173 | | - /** Structured annotation data as JSON string (when applicable) */ |
174 | 78 | document_annotation: string | null |
175 | 79 | } |
176 | 80 | } |
0 commit comments