Skip to content

Commit 3dd77b5

Browse files
Merge pull request #85 from JigsawStack/feat/stt-wordtimestamp
feat(stt): add param and cleanup test suites
2 parents aeb5593 + 7a2bf2e commit 3dd77b5

8 files changed

Lines changed: 5 additions & 955 deletions

File tree

.github/workflows/ci.yml

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,9 @@ jobs:
3737
- test_classification.py
3838
- test_embedding.py
3939
- test_file_store.py
40-
- test_image_generation.py
4140
- test_object_detection.py
4241
- test_prediction.py
43-
- test_sentiment.py
4442
- test_sql.py
45-
- test_summary.py
4643
- test_translate.py
4744
- test_validate.py
4845
- test_web.py

jigsawstack/audio.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,11 @@ class SpeechToTextParams(TypedDict):
4949
the duration of each chunk in seconds, maximum value is 15, defaults to 3
5050
"""
5151

52+
word_timestamps: NotRequired[bool]
53+
"""
54+
When set to true, returns each word as its own entry in the chunks array with its own start and end timestamp. Useful for caption alignment and word-accurate search. Cannot be combined with stream=true.
55+
"""
56+
5257

5358
class ChunkResponse(TypedDict):
5459
text: str

tests/test_embedding.py

Lines changed: 0 additions & 129 deletions
Original file line numberDiff line numberDiff line change
@@ -35,61 +35,6 @@
3535
"https://www.w3.org/WAI/ER/tests/xhtml/testfiles/resources/pdf/dummy.pdf"
3636
)
3737

38-
# Test cases for Embedding V1
39-
EMBEDDING_V1_TEST_CASES = [
40-
{
41-
"name": "text_embedding_basic",
42-
"params": {
43-
"type": "text",
44-
"text": SAMPLE_TEXT,
45-
},
46-
},
47-
{
48-
"name": "text_embedding_with_truncate",
49-
"params": {
50-
"type": "text",
51-
"text": SAMPLE_TEXT * 100, # Long text to test truncation
52-
"token_overflow_mode": "truncate",
53-
},
54-
},
55-
{
56-
"name": "text_embedding_with_error_mode",
57-
"params": {
58-
"type": "text",
59-
"text": SAMPLE_TEXT,
60-
"token_overflow_mode": "error",
61-
},
62-
},
63-
{
64-
"name": "image_embedding_from_url",
65-
"params": {
66-
"type": "image",
67-
"url": SAMPLE_IMAGE_URL,
68-
},
69-
},
70-
{
71-
"name": "audio_embedding_from_url",
72-
"params": {
73-
"type": "audio",
74-
"url": SAMPLE_AUDIO_URL,
75-
},
76-
},
77-
{
78-
"name": "pdf_embedding_from_url",
79-
"params": {
80-
"type": "pdf",
81-
"url": SAMPLE_PDF_URL,
82-
},
83-
},
84-
{
85-
"name": "text_other_type",
86-
"params": {
87-
"type": "text-other",
88-
"text": "This is a different text type for embedding",
89-
},
90-
},
91-
]
92-
9338
# Test cases for Embedding V2
9439
EMBEDDING_V2_TEST_CASES = [
9540
{
@@ -173,80 +118,6 @@
173118
]
174119

175120

176-
class TestEmbeddingV1Sync:
177-
"""Test synchronous Embedding V1 methods"""
178-
179-
sync_test_cases = EMBEDDING_V1_TEST_CASES
180-
181-
@pytest.mark.parametrize(
182-
"test_case", sync_test_cases, ids=[tc["name"] for tc in sync_test_cases]
183-
)
184-
def test_embedding_v1(self, test_case):
185-
"""Test synchronous embedding v1 with various inputs"""
186-
try:
187-
result = jigsaw.embedding(test_case["params"])
188-
assert result["success"]
189-
assert "embeddings" in result
190-
assert isinstance(result["embeddings"], list)
191-
if "chunks" in result:
192-
assert isinstance(result["chunks"], list)
193-
except JigsawStackError as e:
194-
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")
195-
196-
@pytest.mark.parametrize(
197-
"test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES]
198-
)
199-
def test_embedding_v1_blob(self, test_case):
200-
"""Test synchronous embedding v1 with blob inputs"""
201-
try:
202-
# Download blob content
203-
blob_content = requests.get(test_case["blob_url"]).content
204-
result = jigsaw.embedding(blob_content, test_case["options"])
205-
assert result["success"]
206-
assert "embeddings" in result
207-
assert isinstance(result["embeddings"], list)
208-
except JigsawStackError as e:
209-
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")
210-
211-
212-
class TestEmbeddingV1Async:
213-
"""Test asynchronous Embedding V1 methods"""
214-
215-
async_test_cases = EMBEDDING_V1_TEST_CASES
216-
217-
@pytest.mark.parametrize(
218-
"test_case", async_test_cases, ids=[tc["name"] for tc in async_test_cases]
219-
)
220-
@pytest.mark.asyncio
221-
async def test_embedding_v1_async(self, test_case):
222-
"""Test asynchronous embedding v1 with various inputs"""
223-
try:
224-
result = await async_jigsaw.embedding(test_case["params"])
225-
assert result["success"]
226-
assert "embeddings" in result
227-
assert isinstance(result["embeddings"], list)
228-
if "chunks" in result:
229-
assert isinstance(result["chunks"], list)
230-
except JigsawStackError as e:
231-
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")
232-
233-
@pytest.mark.parametrize(
234-
"test_case", BLOB_TEST_CASES, ids=[tc["name"] for tc in BLOB_TEST_CASES]
235-
)
236-
@pytest.mark.asyncio
237-
async def test_embedding_v1_blob_async(self, test_case):
238-
"""Test asynchronous embedding v1 with blob inputs"""
239-
try:
240-
# Download blob content
241-
blob_content = requests.get(test_case["blob_url"]).content
242-
result = await async_jigsaw.embedding(blob_content, test_case["options"])
243-
assert result["success"]
244-
assert "embeddings" in result
245-
assert isinstance(result["embeddings"], list)
246-
except JigsawStackError as e:
247-
pytest.fail(f"Unexpected JigsawStackError in {test_case['name']}: {e}")
248-
249-
250121
class TestEmbeddingV2Sync:
251122
"""Test synchronous Embedding V2 methods"""
252123

tests/test_sentiment.py

Lines changed: 0 additions & 152 deletions
This file was deleted.

0 commit comments

Comments
 (0)