cohere-ai · jasonozuzu-cohere · Dec 18, 2025 · Dec 18, 2025
diff --git a/.fern/metadata.json b/.fern/metadata.json
@@ -9,8 +9,7 @@
       "fastavro": "^1.9.4",
       "requests": "^2.0.0",
       "types-requests": "^2.0.0",
-      "tokenizers": ">=0.15,<1",
-      "httpx-sse": "^0.4.0"
+      "tokenizers": ">=0.15,<1"
     },
     "improved_imports": true,
     "pydantic_config": {

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,7 +38,6 @@ Repository = 'https://github.com/cohere-ai/cohere-python'
 python = "^3.9"
 fastavro = "^1.9.4"
 httpx = ">=0.21.2"
-httpx-sse = "^0.4.0"
 pydantic = ">= 1.9.2"
 pydantic-core = ">=2.18.2"
 requests = "^2.0.0"

diff --git a/reference.md b/reference.md
@@ -1615,7 +1615,7 @@ client.rerank(
     ],
     query="What is the capital of the United States?",
     top_n=3,
-    model="rerank-v3.5",
+    model="rerank-v4.0-pro",
 )
 
 ```
@@ -2492,10 +2492,7 @@ If tool_choice isn't specified, then the model is free to choose whether to use
 <dl>
 <dd>
 
-**priority:** `typing.Optional[int]` 
-
-The priority of the request (lower means earlier handling; default 0 highest priority).
-Higher priority requests are handled first, and dropped last when the system is under load.
+**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
 </dd>
 </dl>
@@ -2793,10 +2790,7 @@ If tool_choice isn't specified, then the model is free to choose whether to use
 <dl>
 <dd>
 
-**priority:** `typing.Optional[int]` 
-
-The priority of the request (lower means earlier handling; default 0 highest priority).
-Higher priority requests are handled first, and dropped last when the system is under load.
+**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
 </dd>
 </dl>
@@ -2972,10 +2966,7 @@ If `NONE` is selected, when the input exceeds the maximum input token length an
 <dl>
 <dd>
 
-**priority:** `typing.Optional[int]` 
-
-The priority of the request (lower means earlier handling; default 0 highest priority).
-Higher priority requests are handled first, and dropped last when the system is under load.
+**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
 </dd>
 </dl>
@@ -3038,7 +3029,7 @@ client.v2.rerank(
     ],
     query="What is the capital of the United States?",
     top_n=3,
-    model="rerank-v3.5",
+    model="rerank-v4.0-pro",
 )
 
 ```
@@ -3102,10 +3093,7 @@ For optimal performance we recommend against sending more than 1,000 documents i
 <dl>
 <dd>
 
-**priority:** `typing.Optional[int]` 
-
-The priority of the request (lower means earlier handling; default 0 highest priority).
-Higher priority requests are handled first, and dropped last when the system is under load.
+**priority:** `typing.Optional[int]` — Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
 </dd>
 </dl>

diff --git a/requirements.txt b/requirements.txt
@@ -1,6 +1,5 @@
 fastavro==1.9.4
 httpx>=0.21.2
-httpx-sse==0.4.0
 pydantic>= 1.9.2
 pydantic-core>=2.18.2
 requests==2.0.0

diff --git a/src/cohere/base_client.py b/src/cohere/base_client.py
@@ -1205,7 +1205,7 @@ def rerank(
             ],
             query="What is the capital of the United States?",
             top_n=3,
-            model="rerank-v3.5",
+            model="rerank-v4.0-pro",
         )
         """
         _response = self._raw_client.rerank(
@@ -2755,7 +2755,7 @@ async def main() -> None:
                 ],
                 query="What is the capital of the United States?",
                 top_n=3,
-                model="rerank-v3.5",
+                model="rerank-v4.0-pro",
             )
 
 

diff --git a/src/cohere/v2/client.py b/src/cohere/v2/client.py
@@ -160,8 +160,7 @@ def chat_stream(
         thinking : typing.Optional[Thinking]
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -331,8 +330,7 @@ def chat(
         thinking : typing.Optional[Thinking]
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -451,8 +449,7 @@ def embed(
             If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -529,8 +526,7 @@ def rerank(
             Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -558,7 +554,7 @@ def rerank(
             ],
             query="What is the capital of the United States?",
             top_n=3,
-            model="rerank-v3.5",
+            model="rerank-v4.0-pro",
         )
         """
         _response = self._raw_client.rerank(
@@ -704,8 +700,7 @@ async def chat_stream(
         thinking : typing.Optional[Thinking]
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -884,8 +879,7 @@ async def chat(
         thinking : typing.Optional[Thinking]
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -1012,8 +1006,7 @@ async def embed(
             If `NONE` is selected, when the input exceeds the maximum input token length an error will be returned.
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -1098,8 +1091,7 @@ async def rerank(
             Defaults to `4096`. Long documents will be automatically truncated to the specified number of tokens.
 
         priority : typing.Optional[int]
-            The priority of the request (lower means earlier handling; default 0 highest priority).
-            Higher priority requests are handled first, and dropped last when the system is under load.
+            Controls how early the request is handled. Lower numbers indicate higher priority (default: 0, the highest). When the system is under load, higher-priority requests are processed first and are the least likely to be dropped.
 
         request_options : typing.Optional[RequestOptions]
             Request-specific configuration.
@@ -1132,7 +1124,7 @@ async def main() -> None:
                 ],
                 query="What is the capital of the United States?",
                 top_n=3,
-                model="rerank-v3.5",
+                model="rerank-v4.0-pro",
             )