vercel-labs · elprans · May 19, 2026 · msullivan · May 22, 2026
diff --git a/examples/coding_agent_minimal.py b/examples/coding_agent_minimal.py
@@ -3,7 +3,6 @@
 import asyncio
 import json
 import sys
-import typing
 
 import ai
 
@@ -16,9 +15,7 @@
 with sed, make sure to double check the result.
 """
 
-STREAM_PARAMS: dict[str, typing.Any] = {
-    "providerOptions": {"gateway": {"caching": "auto"}},
-}
+STREAM_PARAMS = ai.InferenceRequestParams(cache=ai.CacheParams(mode="auto"))
 
 
 @ai.tool

diff --git a/examples/model_params.py b/examples/model_params.py
@@ -13,12 +13,12 @@
 
 
 async def main() -> None:
-    params = {
-        "providerOptions": {
-            "gateway": {"sort": "cost"},
-            "anthropic": {"speed": "fast"},
-        }
-    }
+    params = ai.InferenceRequestParams(
+        routing=ai.RoutingParams(
+            provider_ranking=ai.ProviderRankingStrategy.COST
+        ),
+        extra_body={"providerOptions": {"anthropic": {"speed": "fast"}}},
+    )
     async with ai.stream(model, messages, params=params) as stream:
         async for event in stream:
             if isinstance(event, ai.events.TextDelta):

diff --git a/examples/prompt_caching.py b/examples/prompt_caching.py
@@ -142,7 +142,7 @@ async def _run(user_text: str) -> ai.types.usage.Usage | None:
         ai.system_message(SYSTEM_PROMPT),
         ai.user_message(user_text),
     ]
-    params = {"providerOptions": {"gateway": {"caching": "auto"}}}
+    params = ai.InferenceRequestParams(cache=ai.CacheParams(mode="auto"))
     async with agent.run(model, messages, params=params) as stream:
         async for _event in stream:
             pass

diff --git a/src/ai/__init__.py b/src/ai/__init__.py
@@ -48,11 +48,41 @@
     UnsupportedProviderError,
 )
 from .models import (
+    DEFAULT,
+    GLOBAL,
+    RANDOM,
+    UNSET,
+    CacheParams,
+    CloudRegion,
+    ContextManagementParams,
+    GeoRegion,
     ImageParams,
+    InferenceRequestParams,
+    MinPSamplerParams,
     Model,
+    ModelProviderDefault,
+    OutputParams,
     Provider,
     ProviderProtocol,
+    ProviderRankingStrategy,
+    ProviderServiceParams,
+    RandomSeed,
+    ReasoningParams,
+    RepetitionPenaltyParams,
+    RoutingParams,
+    RoutingTarget,
+    RoutingTargetChain,
+    SeedSamplerParams,
     Stream,
+    TemperatureSamplerParams,
+    TokenThreshold,
+    ToolCallingParams,
+    ToolChoiceMode,
+    ToolRef,
+    ToolSelection,
+    TopKSamplerParams,
+    TopPSamplerParams,
+    Unset,
     VideoParams,
     generate,
     get_model,
@@ -72,18 +102,27 @@
 )
 
 __all__ = [
-    # Models (from models/)
+    "DEFAULT",
+    "GLOBAL",
+    "RANDOM",
+    "UNSET",
     "AIError",
-    # Agents — primary API
     "Agent",
-    # Agents — tools
     "AgentTool",
+    "CacheParams",
+    "CloudRegion",
     "ConfigurationError",
     "Context",
+    "ContextManagementParams",
+    "GeoRegion",
     "HTTPErrorContext",
     "ImageParams",
+    "InferenceRequestParams",
     "InstallationError",
+    "MinPSamplerParams",
     "Model",
+    "ModelProviderDefault",
+    "OutputParams",
     "Provider",
     "ProviderAPIError",
     "ProviderAuthenticationError",
@@ -99,34 +138,50 @@
     "ProviderOverloadedError",
     "ProviderPermissionDeniedError",
     "ProviderProtocol",
+    "ProviderRankingStrategy",
     "ProviderRateLimitError",
     "ProviderRequestTooLargeError",
     "ProviderResponseError",
+    "ProviderServiceParams",
     "ProviderServiceUnavailableError",
     "ProviderStatusError",
     "ProviderTimeoutError",
     "ProviderUnprocessableEntityError",
+    "RandomSeed",
+    "ReasoningParams",
+    "RepetitionPenaltyParams",
+    "RoutingParams",
+    "RoutingTarget",
+    "RoutingTargetChain",
+    "SeedSamplerParams",
     "Stream",
     "StreamingStatusTool",
     "StreamingTextTool",
     "SubAgentTool",
+    "TemperatureSamplerParams",
+    "TokenThreshold",
     "Tool",
     "ToolCall",
+    "ToolCallingParams",
+    "ToolChoiceMode",
+    "ToolRef",
     "ToolRunner",
+    "ToolSelection",
+    "TopKSamplerParams",
+    "TopPSamplerParams",
+    "Unset",
     "UnsupportedProviderError",
     "VideoParams",
     "abort_pending_hook",
     "agent",
     "assistant_message",
     "cancel_hook",
     "errors",
-    # Submodules
     "events",
     "file_part",
     "generate",
     "get_model",
     "get_provider",
-    # Agents — hooks
     "hook",
     "mcp",
     "messages",
@@ -143,9 +198,7 @@
     "tool_result",
     "tool_result_part",
     "tools",
-    # Builders (from types/builders)
     "user_message",
     "util",
-    # Agents — composition
     "yield_from",
 ]
diff --git a/src/ai/_types.py b/src/ai/_types.py
@@ -0,0 +1,10 @@
+from collections.abc import Iterator
+from typing import Protocol, TypeVar
+
+_T_co = TypeVar("_T_co", covariant=True)
+
+
+class Collection(Protocol[_T_co]):
+    def __contains__(self, value: object, /) -> bool: ...
+    def __iter__(self) -> Iterator[_T_co]: ...
+    def __len__(self) -> int: ...
diff --git a/src/ai/agents/_middleware.py b/src/ai/agents/_middleware.py
@@ -43,6 +43,7 @@
     import pydantic
 
     from ..models.core.model import Model
+    from ..models.core.params import GenerateParams
     from ..types import events as events_
     from ..types.tools import Tool
     from .agent import Context
@@ -71,7 +72,7 @@ class GenerateContext:
 
     model: Model
     messages: list[messages_.Message]
-    params: Any
+    params: GenerateParams
 
     def __post_init__(self) -> None:
         object.__setattr__(self, "messages", list(self.messages))

diff --git a/src/ai/agents/agent.py b/src/ai/agents/agent.py
@@ -813,7 +813,9 @@ class Context(pydantic.BaseModel):
     output_type: type[pydantic.BaseModel] | None = pydantic.Field(
         default=None, exclude=True, repr=False
     )
-    params: Any = pydantic.Field(default=None, exclude=True, repr=False)
+    params: models.InferenceRequestParams | None = pydantic.Field(
+        default=None, exclude=True, repr=False
+    )
 
     _agent_tools_by_name: dict[str, AgentTool] = pydantic.PrivateAttr(
         default_factory=dict
@@ -1178,7 +1180,7 @@ def run(
         model: models.Model,
         messages: list[types.messages.Message],
         *,
-        params: Any = None,
+        params: models.InferenceRequestParams | None = None,
         _middleware: list[middleware_._Middleware] | None = None,
     ) -> AbstractAsyncContextManager[AgentStream[str]]: ...
     @overload
@@ -1188,7 +1190,7 @@ def run[T: pydantic.BaseModel](
         messages: list[types.messages.Message],
         *,
         output_type: type[T],
-        params: Any = None,
+        params: models.InferenceRequestParams | None = None,
         _middleware: list[middleware_._Middleware] | None = None,
     ) -> AbstractAsyncContextManager[AgentStream[T]]: ...
     def run(
@@ -1197,7 +1199,7 @@ def run(
         messages: list[types.messages.Message],
         *,
         output_type: type[pydantic.BaseModel] | None = None,
-        params: Any = None,
+        params: models.InferenceRequestParams | None = None,
         _middleware: list[middleware_._Middleware] | None = None,
     ) -> AbstractAsyncContextManager[AgentStream[Any]]:
         """Run the agent loop, yielding events to the consumer.
@@ -1237,7 +1239,7 @@ async def _run(
         messages: list[types.messages.Message],
         *,
         output_type: type[pydantic.BaseModel] | None,
-        params: Any,
+        params: models.InferenceRequestParams | None,
         _middleware: list[middleware_._Middleware] | None,
     ) -> AsyncIterator[AgentStream[Any]]:
         context = Context(

diff --git a/src/ai/models/__init__.py b/src/ai/models/__init__.py
@@ -45,23 +45,85 @@
     stream,
 )
 from .core.model import Model, get_model
-from .core.params import GenerateParams, ImageParams, VideoParams
+from .core.params import (
+    DEFAULT,
+    GLOBAL,
+    RANDOM,
+    UNSET,
+    CacheParams,
+    CloudRegion,
+    ContextManagementParams,
+    GenerateParams,
+    GeoRegion,
+    ImageParams,
+    InferenceRequestParams,
+    MinPSamplerParams,
+    ModelProviderDefault,
+    OutputParams,
+    ProviderRankingStrategy,
+    ProviderServiceParams,
+    RandomSeed,
+    ReasoningParams,
+    RepetitionPenaltyParams,
+    RoutingParams,
+    RoutingTarget,
+    RoutingTargetChain,
+    SeedSamplerParams,
+    TemperatureSamplerParams,
+    TokenThreshold,
+    ToolCallingParams,
+    ToolChoiceMode,
+    ToolRef,
+    ToolSelection,
+    TopKSamplerParams,
+    TopPSamplerParams,
+    Unset,
+    VideoParams,
+)
 
 __all__ = [
-    # Core types
+    "DEFAULT",
+    "GLOBAL",
+    "RANDOM",
+    "UNSET",
+    "CacheParams",
+    "CloudRegion",
+    "ContextManagementParams",
     "Executor",
     "GenerateExecutor",
     "GenerateParams",
     "GenerateRequest",
+    "GeoRegion",
     "ImageParams",
+    "InferenceRequestParams",
+    "MinPSamplerParams",
     "Model",
+    "ModelProviderDefault",
+    "OutputParams",
     "Provider",
     "ProviderProtocol",
+    "ProviderRankingStrategy",
+    "ProviderServiceParams",
+    "RandomSeed",
+    "ReasoningParams",
+    "RepetitionPenaltyParams",
+    "RoutingParams",
+    "RoutingTarget",
+    "RoutingTargetChain",
+    "SeedSamplerParams",
     "Stream",
     "StreamExecutor",
     "StreamRequest",
+    "TemperatureSamplerParams",
+    "TokenThreshold",
+    "ToolCallingParams",
+    "ToolChoiceMode",
+    "ToolRef",
+    "ToolSelection",
+    "TopKSamplerParams",
+    "TopPSamplerParams",
+    "Unset",
     "VideoParams",
-    # Public API
     "generate",
     "get_model",
     "probe",