splunk
diff --git a/‎splunklib/ai/README.md‎
Lines changed: 30 additions & 46 deletions b/‎splunklib/ai/README.md‎
Lines changed: 30 additions & 46 deletions
diff --git a/‎splunklib/ai/agent.py‎
Lines changed: 9 additions & 0 deletions b/‎splunklib/ai/agent.py‎
Lines changed: 9 additions & 0 deletions
diff --git a/‎splunklib/ai/base_agent.py‎
Lines changed: 9 additions & 28 deletions b/‎splunklib/ai/base_agent.py‎
Lines changed: 9 additions & 28 deletions
@@ -696,7 +696,7 @@ triggers the retry logic described above. A custom `model_middleware` can interc
 to observe, log, or override the retry behavior. A custom `model_middleware` can also raise
 the `StructuredOutputGenerationException` manually to reject structured output and force a re-generation.
 
-The maximal number of re-tries is limited per agent loop invocation see [Default limit middlewares](#default-limit-middlewares).
+The maximal number of re-tries is limited per agent loop invocation see [Default limits](#default-limits).
 
 ### Subagents with structured output/input
 
@@ -977,103 +977,87 @@ model = OpenAIModel(...)
 service = connect(...)
 
 @before_model
-def log_usage(req: ModelRequest) -> None:
-    logger.debug(f"Steps: {req.state.total_steps}, Tokens: {req.state.token_count}")
+def log_steps(req: ModelRequest) -> None:
+    logger.debug(f"Steps: {len(req.state.messages)}")
 
 
 async with Agent(
     model=model,
     service=service,
     system_prompt="...",
-    middleware=[log_usage],
+    middleware=[log_steps],
 ) as agent: ...
 ```
 
-The hooks can stop the Agentic Loop under custom conditions by raising exceptions.
-The logic of the hook can be more advanced and include multiple conditions, for example, based on both token usage and execution time:
+The hooks can stop the Agentic Loop under custom conditions by raising exceptions, for example:
 
 ```py
 from splunklib.ai.hooks import before_model
 from splunklib.ai.middleware import AgentMiddleware, ModelRequest
 
-def token_and_step_limit(token_limit: float, step_limit: int) -> AgentMiddleware:
+def message_limit(message_limit: int) -> AgentMiddleware:
     @before_model
     def _hook(req: ModelRequest) -> None:
-        if req.state.token_count > token_limit or req.state.total_steps >= step_limit:
+        if len(req.state.messages) >= message_limit:
             raise Exception("Stopping Agentic Loop")
 
     return _hook
 
 
 async with Agent(
     ...,
-    middleware=[token_and_step_limit(token_limit=10_000, step_limit=5)],
+    middleware=[message_limit(message_limit=5)],
 ) as agent: ...
 ```
 
-## Default limit middlewares
+## Default limits
 
 Every `Agent` automatically applies sane default limits to prevent runaway execution
-or excessive token usage. Default limit middlewares are appended after any user-supplied
-middleware, so they always act on the final state of the request. If you override one of
-the defaults by passing your own instance, you are responsible for its position in the
-chain - place it last if you want the same behavior.
+or excessive token usage.
 
-| Middleware | Default | Measured |
+| Limit | Default | Measured |
 |---|---|---|
-| `TokenLimitMiddleware` | 200 000 tokens | token count of messages passed to the model |
-| `StepLimitMiddleware` | 100 steps | steps taken |
-| `TimeoutLimitMiddleware` | 600 seconds (10 minutes) | per `invoke` call |
-| `StructuredOutputRetryLimitMiddleware` | 3 retries | per `invoke` call |
+| `max_tokens` | 200 000 tokens | token count of messages passed to the model |
+| `max_steps` | 100 steps | number of messages in the conversation |
+| `timeout` | 600 seconds (10 minutes) | per `invoke` call |
+| `max_structured_output_retires` | 3 retries | per `invoke` call |
 
-`TokenLimitMiddleware` and `StepLimitMiddleware` check the values from the messages passed to the
-model on each call. `TimeoutLimitMiddleware`  and `StructuredOutputRetryLimitMiddlewa` resets its
-deadline/limit on each `invoke`, so effectively these limit only the agent loop.
+`max_tokens` and `max_steps` are checked against the messages passed to the model on each call.
+`timeout` and `max_structured_output_retires` reset on each `invoke`, so they limit only the
+current agent loop invocation.
 
 When a limit is exceeded, the agent raises the corresponding exception:
-`TokenLimitExceededException`, `StepsLimitExceededException`, or `TimeoutExceededException`,
+`TokenLimitExceededException`, `StepsLimitExceededException`, `TimeoutExceededException`, or
 `StructuredOutputRetryLimitExceededException`.
 
 ### Overriding defaults
 
-To override a specific limit, pass your own instance of the corresponding middleware
-class. The default for that limit is suppressed automatically - the other defaults
-remain active:
+Limits are configured via the `AgentLimits` dataclass passed to the `Agent` constructor.
+Only the fields you specify are overridden; the rest keep their defaults:
 
 ```py
-from splunklib.ai.limits import (
-    TokenLimitMiddleware,
-    StepLimitMiddleware,
-    TimeoutLimitMiddleware,
-    StructuredOutputRetryLimitMiddleware,
-)
+from splunklib.ai.limits import AgentLimits
 
 async with Agent(
     ...,
-    middleware=[
-        TokenLimitMiddleware(50_000),   # overrides default 200 000; other defaults still apply
-    ],
+    limits=AgentLimits(max_tokens=50_000),  # overrides default 200 000; other defaults still apply
 ) as agent: ...
 ```
 
-To override all defaults, pass all of these to Agent's middleware list:
+To override all defaults:
 
 ```py
 async with Agent(
     ...,
-    middleware=[
-        StructuredOutputRetryLimitMiddleware(0), # no-retries.
-        TokenLimitMiddleware(50_000),
-        StepLimitMiddleware(10),
-        TimeoutLimitMiddleware(30.0),
-    ],
+    limits=AgentLimits(
+        max_tokens=50_000,
+        max_steps=10,
+        timeout=30.0,
+        max_structured_output_retires=0,  # no retries
+    ),
 ) as agent: ...
 ```
 
-**Note**: When overriding limit middlewares, order matters. Place `StructuredOutputRetryLimitMiddleware`
-first and `TokenLimitMiddleware`, `StepLimitMiddleware`, and `TimeoutLimitMiddleware` last,
-otherwise the limits may not behave as expected.
-
 There is no explicit opt-out - the intent is that agents should always have some guardrails.
 
 ## Logger
 
@@ -26,6 +26,7 @@
 from splunklib.ai.conversation_store import ConversationStore
 from splunklib.ai.core.backend import AgentImpl
 from splunklib.ai.core.backend_registry import get_backend
+from splunklib.ai.limits import AgentLimits
 from splunklib.ai.messages import AgentResponse, BaseMessage, HumanMessage, OutputT
 from splunklib.ai.middleware import AgentMiddleware
 from splunklib.ai.model import PredefinedModel
@@ -47,6 +48,8 @@
 _testing_app_id: str | None = None
 
 DEFAULT_TOOL_SETTINGS = ToolSettings(local=False, remote=None)
+DEFAULT_AGENT_LIMITS = AgentLimits()
+
 _SPLUNK_SYSTEM_USER = "splunk-system-user"
 
 
@@ -133,6 +136,10 @@ class Agent(BaseAgent[OutputT]):
 
             Never invoke an Agent using the same thread_id more than once concurrently
             while using the same conversation_store.
+
+        limits:
+            Optional `AgentLimits` instance controlling the built-in safety limits.
+            When omitted, sane defaults are applied automatically.
     """
 
     _impl: AgentImpl[OutputT] | None
@@ -149,6 +156,7 @@ def __init__(
         output_schema: type[OutputT] | None = None,
         input_schema: type[BaseModel] | None = None,  # Only used by Subagents
         middleware: Sequence[AgentMiddleware] | None = None,
+        limits: AgentLimits = DEFAULT_AGENT_LIMITS,
         name: str = "",  # Only used by Subagents
         description: str = "",  # Only used by Subagents
         logger: Logger | None = None,
@@ -169,6 +177,7 @@ def __init__(
             logger=logger,
             conversation_store=conversation_store,
             thread_id=thread_id if thread_id is not None else str(uuid4()),
+            limits=limits,
         )
 
         self._service = service
 
@@ -22,14 +22,7 @@
 
 from splunklib.ai.conversation_store import ConversationStore
 from splunklib.ai.limits import (
-    DEFAULT_STEP_LIMIT,
-    DEFAULT_STRUCTURED_OUTPUT_RETRY_LIMIT,
-    DEFAULT_TIMEOUT_SECONDS,
-    DEFAULT_TOKEN_LIMIT,
-    StepLimitMiddleware,
-    StructuredOutputRetryLimitMiddleware,
-    TimeoutLimitMiddleware,
-    TokenLimitMiddleware,
+    AgentLimits,
 )
 from splunklib.ai.messages import AgentResponse, BaseMessage, OutputT
 from splunklib.ai.middleware import AgentMiddleware
@@ -53,6 +46,7 @@ class BaseAgent(Generic[OutputT], ABC):  # noqa: UP046 TODO[BJ]
     _logger: logging.Logger
     _conversation_store: ConversationStore | None = None
     _thread_id: str
+    _limits: AgentLimits
 
     def __init__(
         self,
@@ -69,6 +63,7 @@ def __init__(
         logger: logging.Logger | None,
         conversation_store: ConversationStore | None,
         thread_id: str,
+        limits: AgentLimits,
     ) -> None:
         self._system_prompt = system_prompt
         self._model = model
@@ -79,26 +74,8 @@ def __init__(
         self._agents = tuple(agents) if agents else ()
         self._input_schema = input_schema
         self._output_schema = output_schema
-        user_middleware = tuple(middleware) if middleware else ()
-        user_middleware_types = {type(m) for m in user_middleware}
-
-        # NOTE: we're creating separate instances per agent - TimeoutLimitMiddleware is stateful
-        # and sharing one would cause agents to overwrite each other's deadline.
-        predefined_before: list[AgentMiddleware] = [
-            StructuredOutputRetryLimitMiddleware(DEFAULT_STRUCTURED_OUTPUT_RETRY_LIMIT),
-        ]
-        predefined_after: list[AgentMiddleware] = [
-            TokenLimitMiddleware(DEFAULT_TOKEN_LIMIT),
-            StepLimitMiddleware(DEFAULT_STEP_LIMIT),
-            TimeoutLimitMiddleware(DEFAULT_TIMEOUT_SECONDS),
-        ]
-
-        self._middleware = (
-            *[m for m in predefined_before if type(m) not in user_middleware_types],
-            *user_middleware,
-            *[m for m in predefined_after if type(m) not in user_middleware_types],
-        )
-
+        self._limits = limits
+        self._middleware = middleware
         self._trace_id = secrets.token_hex(16)  # 32 Hex characters
         self._conversation_store = conversation_store
         self._thread_id = thread_id
@@ -177,3 +154,7 @@ def conversation_store(self) -> ConversationStore | None:
     @property
     def default_thread_id(self) -> str:
         return self._thread_id
+
+    @property
+    def limits(self) -> AgentLimits:
+        return self._limits