|
35 | 35 | "https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf" |
36 | 36 | ) |
37 | 37 |
|
38 | | -# Test cases for Embedding V1 |
39 | | -EMBEDDING_V1_TEST_CASES = [ |
40 | | - { |
41 | | - "name": "text_embedding_basic", |
42 | | - "params": { |
43 | | - "type": "text", |
44 | | - "text": SAMPLE_TEXT, |
45 | | - }, |
46 | | - }, |
47 | | - { |
48 | | - "name": "text_embedding_with_truncate", |
49 | | - "params": { |
50 | | - "type": "text", |
51 | | - "text": SAMPLE_TEXT * 100, # Long text to test truncation |
52 | | - "token_overflow_mode": "truncate", |
53 | | - }, |
54 | | - }, |
55 | | - { |
56 | | - "name": "text_embedding_with_error_mode", |
57 | | - "params": { |
58 | | - "type": "text", |
59 | | - "text": SAMPLE_TEXT, |
60 | | - "token_overflow_mode": "error", |
61 | | - }, |
62 | | - }, |
63 | | - { |
64 | | - "name": "image_embedding_from_url", |
65 | | - "params": { |
66 | | - "type": "image", |
67 | | - "url": SAMPLE_IMAGE_URL, |
68 | | - }, |
69 | | - }, |
70 | | - { |
71 | | - "name": "audio_embedding_from_url", |
72 | | - "params": { |
73 | | - "type": "audio", |
74 | | - "url": SAMPLE_AUDIO_URL, |
75 | | - }, |
76 | | - }, |
77 | | - { |
78 | | - "name": "pdf_embedding_from_url", |
79 | | - "params": { |
80 | | - "type": "pdf", |
81 | | - "url": SAMPLE_PDF_URL, |
82 | | - }, |
83 | | - }, |
84 | | - { |
85 | | - "name": "text_other_type", |
86 | | - "params": { |
87 | | - "type": "text-other", |
88 | | - "text": "This is a different text type for embedding", |
89 | | - }, |
90 | | - }, |
91 | | -] |
92 | | - |
93 | 38 | # Test cases for Embedding V2 |
94 | 39 | EMBEDDING_V2_TEST_CASES = [ |
95 | 40 | { |
|
173 | 118 | ] |
174 | 119 |
|
175 | 120 |
|
176 | | -class TestEmbeddingV1Sync: |
177 | | - """Test synchronous Embedding V1 methods""" |
178 | | - |
179 | | - sync_test_cases = EMBEDDING_V1_TEST_CASES |
180 | | - |
181 | | - @pytest.mark.parametrize( |
182 | | - "test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases] |
183 | | - ) |
184 | | - def test_embedding_v1(self, test_case): |
185 | | - """Test synchronous embedding v1 with various inputs""" |
186 | | - try: |
187 | | - result = jigsaw.embedding(test_case["params"]) |
188 | | - assert result["success"] |
189 | | - assert "embeddings" in result |
190 | | - assert isinstance(result["embeddings"], list) |
191 | | - if "chunks" in result: |
192 | | - assert isinstance(result["chunks"], list) |
193 | | - except JigsawStackError as e: |
194 | | - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") |
195 | | - |
196 | | - @pytest.mark.parametrize( |
197 | | - "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] |
198 | | - ) |
199 | | - def test_embedding_v1_blob(self, test_case): |
200 | | - """Test synchronous embedding v1 with blob inputs""" |
201 | | - try: |
202 | | - # Download blob content |
203 | | - blob_content = requests.get(test_case["blob_url"]).content |
204 | | - result = jigsaw.embedding(blob_content, test_case["options"]) |
205 | | - assert result["success"] |
206 | | - assert "embeddings" in result |
207 | | - assert isinstance(result["embeddings"], list) |
208 | | - except JigsawStackError as e: |
209 | | - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") |
210 | | - |
211 | | - |
212 | | -class TestEmbeddingV1Async: |
213 | | - """Test asynchronous Embedding V1 methods""" |
214 | | - |
215 | | - async_test_cases = EMBEDDING_V1_TEST_CASES |
216 | | - |
217 | | - @pytest.mark.parametrize( |
218 | | - "test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases] |
219 | | - ) |
220 | | - @pytest.mark.asyncio |
221 | | - async def test_embedding_v1_async(self, test_case): |
222 | | - """Test asynchronous embedding v1 with various inputs""" |
223 | | - try: |
224 | | - result = await async_jigsaw.embedding(test_case["params"]) |
225 | | - assert result["success"] |
226 | | - assert "embeddings" in result |
227 | | - assert isinstance(result["embeddings"], list) |
228 | | - if "chunks" in result: |
229 | | - assert isinstance(result["chunks"], list) |
230 | | - except JigsawStackError as e: |
231 | | - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") |
232 | | - |
233 | | - @pytest.mark.parametrize( |
234 | | - "test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES] |
235 | | - ) |
236 | | - @pytest.mark.asyncio |
237 | | - async def test_embedding_v1_blob_async(self, test_case): |
238 | | - """Test asynchronous embedding v1 with blob inputs""" |
239 | | - try: |
240 | | - # Download blob content |
241 | | - blob_content = requests.get(test_case["blob_url"]).content |
242 | | - result = await async_jigsaw.embedding(blob_content, test_case["options"]) |
243 | | - assert result["success"] |
244 | | - assert "embeddings" in result |
245 | | - assert isinstance(result["embeddings"], list) |
246 | | - except JigsawStackError as e: |
247 | | - pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}") |
248 | | - |
249 | | - |
250 | 121 | class TestEmbeddingV2Sync: |
251 | 122 | """Test synchronous Embedding V2 methods""" |
252 | 123 |
|
|
0 commit comments