1313logging .basicConfig (level = logging .INFO )
1414logger = logging .getLogger (__name__ )
1515
16- jigsaw = jigsawstack .JigsawStack (api_url = "http://localhost:3000/api/" , api_key = os .getenv ("JIGSAWSTACK_API_KEY" ))
17- async_jigsaw = jigsawstack .AsyncJigsawStack (api_url = "http://localhost:3000/api/" , api_key = os .getenv ("JIGSAWSTACK_API_KEY" ))
16+ jigsaw = jigsawstack .JigsawStack (
17+ api_url = "http://localhost:3000/api/" , api_key = os .getenv ("JIGSAWSTACK_API_KEY" )
18+ )
19+ async_jigsaw = jigsawstack .AsyncJigsawStack (
20+ api_url = "http://localhost:3000/api/" , api_key = os .getenv ("JIGSAWSTACK_API_KEY" )
21+ )
1822
1923IMAGE_URL = "https://jigsawstack.com/preview/vocr-example.jpg"
2024
4650 "prompt" : [
4751 "What is the main heading?" ,
4852 "Extract any dates mentioned" ,
49- "What are the key points?"
53+ "What are the key points?" ,
5054 ]
5155 },
5256 },
5761 "prompt" : {
5862 "title" : "Extract the main title" ,
5963 "content" : "What is the main content?" ,
60- "metadata" : "Extract any metadata or additional information"
64+ "metadata" : "Extract any metadata or additional information" ,
6165 }
6266 },
6367 },
6468 {
6569 "name" : "url_with_string_prompt" ,
66- "params" : {
67- "url" : IMAGE_URL ,
68- "prompt" : "Summarize the text content"
69- },
70+ "params" : {"url" : IMAGE_URL , "prompt" : "Summarize the text content" },
7071 "blob" : None ,
7172 "options" : None ,
7273 },
7374 {
7475 "name" : "url_with_list_prompt" ,
75- "params" : {
76- "url" : IMAGE_URL ,
77- "prompt" : ["Extract headers" , "Extract body text" ]
78- },
76+ "params" : {"url" : IMAGE_URL , "prompt" : ["Extract headers" , "Extract body text" ]},
7977 "blob" : None ,
8078 "options" : None ,
8179 },
8583PDF_TEST_CASES = [
8684 {
8785 "name" : "pdf_with_page_range" ,
88- "params" : {
89- "url" : PDF_URL ,
90- "page_range" : [1 , 3 ],
91- "prompt" : "Extract text from these pages"
92- },
86+ "params" : {"url" : PDF_URL , "page_range" : [1 , 3 ], "prompt" : "Extract text from these pages" },
9387 "blob" : None ,
9488 "options" : None ,
9589 },
9690 {
9791 "name" : "pdf_single_page" ,
98- "params" : {
99- "url" : PDF_URL ,
100- "page_range" : [1 , 1 ],
101- "prompt" : "What is on the first page?"
102- },
92+ "params" : {"url" : PDF_URL , "page_range" : [1 , 1 ], "prompt" : "What is on the first page?" },
10393 "blob" : None ,
10494 "options" : None ,
10595 },
10696 {
10797 "name" : "pdf_blob_with_page_range" ,
10898 "blob" : PDF_URL ,
109- "options" : {
110- "page_range" : [1 , 3 ],
111- "prompt" : "what is this about?"
112- },
99+ "options" : {"page_range" : [1 , 3 ], "prompt" : "what is this about?" },
113100 },
114101]
115102
@@ -135,7 +122,7 @@ def test_vocr(self, test_case):
135122 result = jigsaw .vision .vocr (test_case ["params" ])
136123
137124 print (f"Test { test_case ['name' ]} : Success={ result .get ('success' )} " )
138-
125+
139126 # Verify response structure
140127 assert result ["success" ] is True
141128 if "prompt" in (test_case .get ("params" ) or {}):
@@ -147,13 +134,11 @@ def test_vocr(self, test_case):
147134 assert isinstance (result ["tags" ], list )
148135 assert "sections" in result
149136 assert isinstance (result ["sections" ], list )
150-
137+
151138 except JigsawStackError as e :
152139 pytest .fail (f"Unexpected JigsawStackError in { test_case ['name' ]} : { e } " )
153140
154- @pytest .mark .parametrize (
155- "test_case" , pdf_test_cases , ids = [tc ["name" ] for tc in pdf_test_cases ]
156- )
141+ @pytest .mark .parametrize ("test_case" , pdf_test_cases , ids = [tc ["name" ] for tc in pdf_test_cases ])
157142 def test_vocr_pdf (self , test_case ):
158143 """Test synchronous VOCR with PDF inputs"""
159144 try :
@@ -164,19 +149,21 @@ def test_vocr_pdf(self, test_case):
164149 else :
165150 # Use params directly
166151 result = jigsaw .vision .vocr (test_case ["params" ])
167-
152+
168153 # Verify response structure
169154 assert result ["success" ] is True
170155 if "prompt" in (test_case .get ("params" ) or {}):
171156 assert "context" in result
172157 assert "total_pages" in result
173-
174- if test_case .get ("params" , {}).get ("page_range" ) or test_case .get ("options" , {}).get ("page_range" ):
158+
159+ if test_case .get ("params" , {}).get ("page_range" ) or test_case .get ("options" , {}).get (
160+ "page_range"
161+ ):
175162 assert "page_range" in result
176163 assert isinstance (result ["page_range" ], list )
177164
178165 logger .info (f"Test { test_case ['name' ]} : total_pages={ result .get ('total_pages' )} " )
179-
166+
180167 except JigsawStackError as e :
181168 pytest .fail (f"Unexpected JigsawStackError in { test_case ['name' ]} : { e } " )
182169
@@ -197,15 +184,13 @@ async def test_vocr_async(self, test_case):
197184 if test_case .get ("blob" ):
198185 # Download blob content
199186 blob_content = requests .get (test_case ["blob" ]).content
200- result = await async_jigsaw .vision .vocr (
201- blob_content , test_case .get ("options" , {})
202- )
187+ result = await async_jigsaw .vision .vocr (blob_content , test_case .get ("options" , {}))
203188 else :
204189 # Use params directly
205190 result = await async_jigsaw .vision .vocr (test_case ["params" ])
206191
207192 print (f"Test { test_case ['name' ]} : Success={ result .get ('success' )} " )
208-
193+
209194 # Verify response structure
210195 assert result ["success" ] is True
211196 if "prompt" in (test_case .get ("params" ) or {}):
@@ -217,44 +202,44 @@ async def test_vocr_async(self, test_case):
217202 assert isinstance (result ["tags" ], list )
218203 assert "sections" in result
219204 assert isinstance (result ["sections" ], list )
220-
205+
221206 # Log some details
222- logger .info (f"Test { test_case ['name' ]} : has_text={ result ['has_text' ]} , tags={ result ['tags' ][:3 ] if result ['tags' ] else []} " )
223-
207+ logger .info (
208+ f"Test { test_case ['name' ]} : has_text={ result ['has_text' ]} , tags={ result ['tags' ][:3 ] if result ['tags' ] else []} "
209+ )
210+
224211 except JigsawStackError as e :
225212 pytest .fail (f"Unexpected JigsawStackError in { test_case ['name' ]} : { e } " )
226213
227- @pytest .mark .parametrize (
228- "test_case" , pdf_test_cases , ids = [tc ["name" ] for tc in pdf_test_cases ]
229- )
214+ @pytest .mark .parametrize ("test_case" , pdf_test_cases , ids = [tc ["name" ] for tc in pdf_test_cases ])
230215 @pytest .mark .asyncio
231216 async def test_vocr_pdf_async (self , test_case ):
232217 """Test asynchronous VOCR with PDF inputs"""
233218 try :
234219 if test_case .get ("blob" ):
235220 # Download blob content
236221 blob_content = requests .get (test_case ["blob" ]).content
237- result = await async_jigsaw .vision .vocr (
238- blob_content , test_case .get ("options" , {})
239- )
222+ result = await async_jigsaw .vision .vocr (blob_content , test_case .get ("options" , {}))
240223 else :
241224 # Use params directly
242225 result = await async_jigsaw .vision .vocr (test_case ["params" ])
243226
244227 print (f"Test { test_case ['name' ]} : Success={ result .get ('success' )} " )
245-
228+
246229 # Verify response structure
247230 assert result ["success" ] is True
248231 if "prompt" in (test_case .get ("params" ) or {}):
249232 assert "context" in result
250233 assert "total_pages" in result # PDF specific
251-
234+
252235 # Check if page_range is in response when requested
253- if test_case .get ("params" , {}).get ("page_range" ) or test_case .get ("options" , {}).get ("page_range" ):
236+ if test_case .get ("params" , {}).get ("page_range" ) or test_case .get ("options" , {}).get (
237+ "page_range"
238+ ):
254239 assert "page_range" in result
255240 assert isinstance (result ["page_range" ], list )
256241
257242 logger .info (f"Test { test_case ['name' ]} : total_pages={ result .get ('total_pages' )} " )
258-
243+
259244 except JigsawStackError as e :
260- pytest .fail (f"Unexpected JigsawStackError in { test_case ['name' ]} : { e } " )
245+ pytest .fail (f"Unexpected JigsawStackError in { test_case ['name' ]} : { e } " )
0 commit comments