cohere-ai · mkozakov · Oct 24, 2025 · Oct 24, 2025
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -3,7 +3,7 @@ name = "cohere"
 
 [tool.poetry]
 name = "cohere"
-version = "5.19.0"
+version = "5.20.0"
 description = ""
 readme = "README.md"
 authors = []

diff --git a/src/cohere/core/client_wrapper.py b/src/cohere/core/client_wrapper.py
@@ -22,10 +22,10 @@ def __init__(
 
     def get_headers(self) -> typing.Dict[str, str]:
         headers: typing.Dict[str, str] = {
-            "User-Agent": "cohere/5.19.0",
+            "User-Agent": "cohere/5.20.0",
             "X-Fern-Language": "Python",
             "X-Fern-SDK-Name": "cohere",
-            "X-Fern-SDK-Version": "5.19.0",
+            "X-Fern-SDK-Version": "5.20.0",
         }
         if self._client_name is not None:
             headers["X-Client-Name"] = self._client_name

diff --git a/src/cohere/types/chat_finish_reason.py b/src/cohere/types/chat_finish_reason.py
@@ -3,5 +3,5 @@
 import typing
 
 ChatFinishReason = typing.Union[
-    typing.Literal["COMPLETE", "STOP_SEQUENCE", "MAX_TOKENS", "TOOL_CALL", "ERROR"], typing.Any
+    typing.Literal["COMPLETE", "STOP_SEQUENCE", "MAX_TOKENS", "TOOL_CALL", "ERROR", "TIMEOUT"], typing.Any
 ]
diff --git a/src/cohere/types/finish_reason.py b/src/cohere/types/finish_reason.py
@@ -3,6 +3,8 @@
 import typing
 
 FinishReason = typing.Union[
-    typing.Literal["COMPLETE", "STOP_SEQUENCE", "ERROR", "ERROR_TOXIC", "ERROR_LIMIT", "USER_CANCEL", "MAX_TOKENS"],
+    typing.Literal[
+        "COMPLETE", "STOP_SEQUENCE", "ERROR", "ERROR_TOXIC", "ERROR_LIMIT", "USER_CANCEL", "MAX_TOKENS", "TIMEOUT"
+    ],
     typing.Any,
 ]
diff --git a/src/cohere/v2/client.py b/src/cohere/v2/client.py
@@ -66,6 +66,7 @@ def chat_stream(
         logprobs: typing.Optional[bool] = OMIT,
         tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,
         thinking: typing.Optional[Thinking] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.Iterator[V2ChatStreamResponse]:
         """
@@ -158,6 +159,10 @@ def chat_stream(
 
         thinking : typing.Optional[Thinking]
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -205,6 +210,7 @@ def chat_stream(
             logprobs=logprobs,
             tool_choice=tool_choice,
             thinking=thinking,
+            priority=priority,
             request_options=request_options,
         ) as r:
             yield from r.data
@@ -231,6 +237,7 @@ def chat(
         logprobs: typing.Optional[bool] = OMIT,
         tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,
         thinking: typing.Optional[Thinking] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> V2ChatResponse:
         """
@@ -323,6 +330,10 @@ def chat(
 
         thinking : typing.Optional[Thinking]
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -368,6 +379,7 @@ def chat(
             logprobs=logprobs,
             tool_choice=tool_choice,
             thinking=thinking,
+            priority=priority,
             request_options=request_options,
         )
         return _response.data
@@ -384,6 +396,7 @@ def embed(
         output_dimension: typing.Optional[int] = OMIT,
         embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
         truncate: typing.Optional[V2EmbedRequestTruncate] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> EmbedByTypeResponse:
         """
@@ -437,6 +450,10 @@ def embed(
 
             If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -470,6 +487,7 @@ def embed(
             output_dimension=output_dimension,
             embedding_types=embedding_types,
             truncate=truncate,
+            priority=priority,
             request_options=request_options,
         )
         return _response.data
@@ -482,6 +500,7 @@ def rerank(
         documents: typing.Sequence[str],
         top_n: typing.Optional[int] = OMIT,
         max_tokens_per_doc: typing.Optional[int] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> V2RerankResponse:
         """
@@ -509,6 +528,10 @@ def rerank(
         max_tokens_per_doc : typing.Optional[int]
             Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -544,6 +567,7 @@ def rerank(
             documents=documents,
             top_n=top_n,
             max_tokens_per_doc=max_tokens_per_doc,
+            priority=priority,
             request_options=request_options,
         )
         return _response.data
@@ -586,6 +610,7 @@ async def chat_stream(
         logprobs: typing.Optional[bool] = OMIT,
         tool_choice: typing.Optional[V2ChatStreamRequestToolChoice] = OMIT,
         thinking: typing.Optional[Thinking] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> typing.AsyncIterator[V2ChatStreamResponse]:
         """
@@ -678,6 +703,10 @@ async def chat_stream(
 
         thinking : typing.Optional[Thinking]
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -733,6 +762,7 @@ async def main() -> None:
             logprobs=logprobs,
             tool_choice=tool_choice,
             thinking=thinking,
+            priority=priority,
             request_options=request_options,
         ) as r:
             async for _chunk in r.data:
@@ -760,6 +790,7 @@ async def chat(
         logprobs: typing.Optional[bool] = OMIT,
         tool_choice: typing.Optional[V2ChatRequestToolChoice] = OMIT,
         thinking: typing.Optional[Thinking] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> V2ChatResponse:
         """
@@ -852,6 +883,10 @@ async def chat(
 
         thinking : typing.Optional[Thinking]
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -905,6 +940,7 @@ async def main() -> None:
             logprobs=logprobs,
             tool_choice=tool_choice,
             thinking=thinking,
+            priority=priority,
             request_options=request_options,
         )
         return _response.data
@@ -921,6 +957,7 @@ async def embed(
         output_dimension: typing.Optional[int] = OMIT,
         embedding_types: typing.Optional[typing.Sequence[EmbeddingType]] = OMIT,
         truncate: typing.Optional[V2EmbedRequestTruncate] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> EmbedByTypeResponse:
         """
@@ -974,6 +1011,10 @@ async def embed(
 
             If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -1015,6 +1056,7 @@ async def main() -> None:
             output_dimension=output_dimension,
             embedding_types=embedding_types,
             truncate=truncate,
+            priority=priority,
             request_options=request_options,
         )
         return _response.data
@@ -1027,6 +1069,7 @@ async def rerank(
         documents: typing.Sequence[str],
         top_n: typing.Optional[int] = OMIT,
         max_tokens_per_doc: typing.Optional[int] = OMIT,
+        priority: typing.Optional[int] = OMIT,
         request_options: typing.Optional[RequestOptions] = None,
     ) -> V2RerankResponse:
         """
@@ -1054,6 +1097,10 @@ async def rerank(
         max_tokens_per_doc : typing.Optional[int]
             Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
 
+        priority : typing.Optional[int]
+            The priority of the request (lower means earlier handling; default 0 highest priority).
+            Higher priority requests are handled first, and dropped last when the system is under load.
+
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
 
@@ -1097,6 +1144,7 @@ async def main() -> None:
             documents=documents,
             top_n=top_n,
             max_tokens_per_doc=max_tokens_per_doc,
+            priority=priority,
             request_options=request_options,
         )
         return _response.data