Skip to content

Commit 38f9641

Browse files
Update audio:
1. Audio api can now accept audio blob directly. Params can be passed along with the blob. 2. speech_to_text overload enables this. 3. Added a new param `chunk_duration` to SpeechToTextParams.
1 parent 5bfcfbb commit 38f9641

File tree

1 file changed

+57
-7
lines changed

1 file changed

+57
-7
lines changed

jigsawstack/audio.py

Lines changed: 57 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
1-
from typing import Any, Dict, List, cast, Union, Optional
1+
from typing import Any, Dict, List, cast, Union, Optional, overload
22
from typing_extensions import NotRequired, TypedDict
33
from .request import Request, RequestConfig
44
from .async_request import AsyncRequest, AsyncRequestConfig
55
from ._config import ClientConfig
66
from typing import Any, Dict, List, cast
77
from typing_extensions import NotRequired, TypedDict
88
from .custom_typing import SupportedAccents
9+
from .helpers import build_path
910

1011

1112
class TextToSpeechParams(TypedDict):
@@ -64,16 +65,41 @@ def __init__(
6465
disable_request_logging=disable_request_logging,
6566
)
6667

67-
def speech_to_text(self, params: SpeechToTextParams) -> SpeechToTextResponse:
68-
path = "/ai/transcribe"
68+
@overload
69+
def speech_to_text(self, params: SpeechToTextParams) -> SpeechToTextResponse: ...
70+
@overload
71+
def speech_to_text(self, file: bytes, options: Optional[SpeechToTextParams] = None) -> SpeechToTextResponse: ...
72+
73+
def speech_to_text(
74+
self,
75+
blob: Union[SpeechToTextParams, bytes],
76+
options: Optional[SpeechToTextParams] = None,
77+
) -> SpeechToTextResponse:
78+
if isinstance(blob, dict): # If params is provided as a dict, we assume it's the first argument
79+
resp = Request(
80+
config=self.config,
81+
path="/ai/transcribe",
82+
params=cast(Dict[Any, Any], blob),
83+
verb="post",
84+
).perform_with_content()
85+
return resp
86+
87+
options = options or {}
88+
path = build_path(base_path="/ai/transcribe", params=options)
89+
content_type = options.get("content_type", "application/octet-stream")
90+
headers = {"Content-Type": content_type}
91+
6992
resp = Request(
7093
config=self.config,
7194
path=path,
72-
params=cast(Dict[Any, Any], params),
95+
params=options,
96+
data=blob,
97+
headers=headers,
7398
verb="post",
7499
).perform_with_content()
75100
return resp
76101

102+
77103
def text_to_speech(self, params: TextToSpeechParams) -> TextToSpeechResponse:
78104
path = "/ai/tts"
79105
resp = Request(
@@ -111,12 +137,36 @@ def __init__(
111137
disable_request_logging=disable_request_logging,
112138
)
113139

114-
async def speech_to_text(self, params: SpeechToTextParams) -> SpeechToTextResponse:
115-
path = "/ai/transcribe"
140+
@overload
141+
async def speech_to_text(self, params: SpeechToTextParams) -> SpeechToTextResponse: ...
142+
@overload
143+
async def speech_to_text(self, file: bytes, options: Optional[SpeechToTextParams] = None) -> SpeechToTextResponse: ...
144+
145+
async def speech_to_text(
146+
self,
147+
blob: Union[SpeechToTextParams, bytes],
148+
options: Optional[SpeechToTextParams] = None,
149+
) -> SpeechToTextResponse:
150+
if isinstance(blob, dict):
151+
resp = await AsyncRequest(
152+
config=self.config,
153+
path="/ai/transcribe",
154+
params=cast(Dict[Any, Any], blob),
155+
verb="post",
156+
).perform_with_content()
157+
return resp
158+
159+
options = options or {}
160+
path = build_path(base_path="/ai/transcribe", params=options)
161+
content_type = options.get("content_type", "application/octet-stream")
162+
headers = {"Content-Type": content_type}
163+
116164
resp = await AsyncRequest(
117165
config=self.config,
118166
path=path,
119-
params=cast(Dict[Any, Any], params),
167+
params=options,
168+
data=blob,
169+
headers=headers,
120170
verb="post",
121171
).perform_with_content()
122172
return resp

0 commit comments

Comments
 (0)