removed default in max config for output tokens

waleedlatif1 · waleedlatif1 · commit 582959c1ced5 · 2026-02-06T18:31:10.000-08:00
diff --git a/apps/sim/providers/anthropic/core.ts b/apps/sim/providers/anthropic/core.ts
@@ -265,8 +265,7 @@ export async function executeAnthropicProviderRequest(
     messages,
     system: systemPrompt,
     max_tokens:
-      Number.parseInt(String(request.maxTokens)) ||
-      getMaxOutputTokensForModel(request.model, request.stream ?? false),
+      Number.parseInt(String(request.maxTokens)) || getMaxOutputTokensForModel(request.model),
     temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
   }
 
@@ -308,7 +307,7 @@ export async function executeAnthropicProviderRequest(
         const budgetTokens = thinkingConfig.thinking.budget_tokens
         const minMaxTokens = budgetTokens + 4096
         if (payload.max_tokens < minMaxTokens) {
-          const modelMax = getMaxOutputTokensForModel(request.model, true)
+          const modelMax = getMaxOutputTokensForModel(request.model)
           payload.max_tokens = Math.min(minMaxTokens, modelMax)
           logger.info(
             `Adjusted max_tokens to ${payload.max_tokens} to satisfy budget_tokens (${budgetTokens}) constraint`
@@ -438,25 +437,13 @@ export async function executeAnthropicProviderRequest(
     const providerStartTime = Date.now()
     const providerStartTimeISO = new Date(providerStartTime).toISOString()
 
-    // Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
-    // but allow users to set lower values if desired. Use Math.max to preserve
-    // thinking-adjusted max_tokens from payload when it's higher.
-    const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
-    const nonStreamingMaxTokens = request.maxTokens
-      ? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
-      : nonStreamingLimit
-    const intermediatePayload = {
-      ...payload,
-      max_tokens: Math.max(nonStreamingMaxTokens, payload.max_tokens),
-    }
-
     try {
       const initialCallTime = Date.now()
-      const originalToolChoice = intermediatePayload.tool_choice
+      const originalToolChoice = payload.tool_choice
       const forcedTools = preparedTools?.forcedTools || []
       let usedForcedTools: string[] = []
 
-      let currentResponse = await createMessage(anthropic, intermediatePayload)
+      let currentResponse = await createMessage(anthropic, payload)
       const firstResponseTime = Date.now() - initialCallTime
 
       let content = ''
@@ -669,7 +656,7 @@ export async function executeAnthropicProviderRequest(
           toolsTime += thisToolsTime
 
           const nextPayload = {
-            ...intermediatePayload,
+            ...payload,
             messages: currentMessages,
           }
 
@@ -852,25 +839,13 @@ export async function executeAnthropicProviderRequest(
   const providerStartTime = Date.now()
   const providerStartTimeISO = new Date(providerStartTime).toISOString()
 
-  // Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
-  // but allow users to set lower values if desired. Use Math.max to preserve
-  // thinking-adjusted max_tokens from payload when it's higher.
-  const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
-  const toolLoopMaxTokens = request.maxTokens
-    ? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
-    : nonStreamingLimit
-  const toolLoopPayload = {
-    ...payload,
-    max_tokens: Math.max(toolLoopMaxTokens, payload.max_tokens),
-  }
-
   try {
     const initialCallTime = Date.now()
-    const originalToolChoice = toolLoopPayload.tool_choice
+    const originalToolChoice = payload.tool_choice
     const forcedTools = preparedTools?.forcedTools || []
     let usedForcedTools: string[] = []
 
-    let currentResponse = await createMessage(anthropic, toolLoopPayload)
+    let currentResponse = await createMessage(anthropic, payload)
     const firstResponseTime = Date.now() - initialCallTime
 
     let content = ''
@@ -1096,7 +1071,7 @@ export async function executeAnthropicProviderRequest(
         toolsTime += thisToolsTime
 
         const nextPayload = {
-          ...toolLoopPayload,
+          ...payload,
           messages: currentMessages,
         }
 
diff --git a/apps/sim/providers/models.ts b/apps/sim/providers/models.ts
@@ -34,17 +34,8 @@ export interface ModelCapabilities {
   toolUsageControl?: boolean
   computerUse?: boolean
   nativeStructuredOutputs?: boolean
-  /**
-   * Max output tokens configuration for Anthropic SDK's streaming timeout workaround.
-   * The Anthropic SDK throws an error for non-streaming requests that may take >10 minutes.
-   * This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK).
-   */
-  maxOutputTokens?: {
-    /** Maximum supported output tokens (used for streaming requests) */
-    max: number
-    /** Conservative default when user doesn't specify maxTokens (controls cost/latency) */
-    default: number
-  }
+  /** Maximum supported output tokens for this model */
+  maxOutputTokens?: number
   reasoningEffort?: {
     values: string[]
   }
@@ -339,7 +330,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 128000, default: 8192 },
+          maxOutputTokens: 128000,
           thinking: {
             levels: ['low', 'medium', 'high', 'max'],
             default: 'high',
@@ -358,7 +349,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -377,7 +368,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -395,7 +386,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -414,7 +405,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -432,7 +423,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -451,7 +442,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -469,7 +460,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         },
         capabilities: {
           temperature: { min: 0, max: 1 },
-          maxOutputTokens: { max: 4096, default: 4096 },
+          maxOutputTokens: 4096,
         },
         contextWindow: 200000,
       },
@@ -484,7 +475,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           computerUse: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -743,7 +734,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 128000, default: 8192 },
+          maxOutputTokens: 128000,
           thinking: {
             levels: ['low', 'medium', 'high', 'max'],
             default: 'high',
@@ -762,7 +753,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -781,7 +772,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -800,7 +791,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -819,7 +810,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
         capabilities: {
           temperature: { min: 0, max: 1 },
           nativeStructuredOutputs: true,
-          maxOutputTokens: { max: 64000, default: 8192 },
+          maxOutputTokens: 64000,
           thinking: {
             levels: ['low', 'medium', 'high'],
             default: 'high',
@@ -2490,26 +2481,19 @@ export function getThinkingLevelsForModel(modelId: string): string[] | null {
 }
 
 /**
- * Get the max output tokens for a specific model
- * Returns the model's max capacity for streaming requests,
- * or the model's safe default for non-streaming requests to avoid timeout issues.
+ * Get the max output tokens for a specific model.
  *
  * @param modelId - The model ID
- * @param streaming - Whether the request is streaming (default: false)
  */
-export function getMaxOutputTokensForModel(modelId: string, streaming = false): number {
+export function getMaxOutputTokensForModel(modelId: string): number {
   const normalizedModelId = modelId.toLowerCase()
   const STANDARD_MAX_OUTPUT_TOKENS = 4096
 
   for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
     for (const model of provider.models) {
       const baseModelId = model.id.toLowerCase()
       if (normalizedModelId === baseModelId || normalizedModelId.startsWith(`${baseModelId}-`)) {
-        const outputTokens = model.capabilities.maxOutputTokens
-        if (outputTokens) {
-          return streaming ? outputTokens.max : outputTokens.default
-        }
-        return STANDARD_MAX_OUTPUT_TOKENS
+        return model.capabilities.maxOutputTokens || STANDARD_MAX_OUTPUT_TOKENS
       }
     }
   }
diff --git a/apps/sim/providers/utils.test.ts b/apps/sim/providers/utils.test.ts
@@ -580,53 +580,25 @@ describe('Model Capabilities', () => {
 
 describe('Max Output Tokens', () => {
   describe('getMaxOutputTokensForModel', () => {
-    it.concurrent('should return higher value for streaming than non-streaming (Anthropic)', () => {
-      const streamingTokens = getMaxOutputTokensForModel('claude-opus-4-6', true)
-      const nonStreamingTokens = getMaxOutputTokensForModel('claude-opus-4-6', false)
-      expect(streamingTokens).toBeGreaterThan(nonStreamingTokens)
-      expect(streamingTokens).toBe(128000)
-      expect(nonStreamingTokens).toBe(8192)
+    it.concurrent('should return correct max for Claude Opus 4.6', () => {
+      expect(getMaxOutputTokensForModel('claude-opus-4-6')).toBe(128000)
     })
 
-    it.concurrent('should return correct values for Claude Sonnet 4.5', () => {
-      expect(getMaxOutputTokensForModel('claude-sonnet-4-5', true)).toBe(64000)
-      expect(getMaxOutputTokensForModel('claude-sonnet-4-5', false)).toBe(8192)
+    it.concurrent('should return correct max for Claude Sonnet 4.5', () => {
+      expect(getMaxOutputTokensForModel('claude-sonnet-4-5')).toBe(64000)
     })
 
-    it.concurrent('should return correct values for Claude Opus 4.1', () => {
-      expect(getMaxOutputTokensForModel('claude-opus-4-1', true)).toBe(64000)
-      expect(getMaxOutputTokensForModel('claude-opus-4-1', false)).toBe(8192)
+    it.concurrent('should return correct max for Claude Opus 4.1', () => {
+      expect(getMaxOutputTokensForModel('claude-opus-4-1')).toBe(64000)
     })
 
     it.concurrent('should return standard default for models without maxOutputTokens', () => {
-      expect(getMaxOutputTokensForModel('gpt-4o', false)).toBe(4096)
-      expect(getMaxOutputTokensForModel('gpt-4o', true)).toBe(4096)
+      expect(getMaxOutputTokensForModel('gpt-4o')).toBe(4096)
     })
 
     it.concurrent('should return standard default for unknown models', () => {
-      expect(getMaxOutputTokensForModel('unknown-model', false)).toBe(4096)
-      expect(getMaxOutputTokensForModel('unknown-model', true)).toBe(4096)
+      expect(getMaxOutputTokensForModel('unknown-model')).toBe(4096)
     })
-
-    it.concurrent(
-      'non-streaming default should be within Anthropic SDK non-streaming threshold',
-      () => {
-        const SDK_NON_STREAMING_THRESHOLD = 21333
-        const models = [
-          'claude-opus-4-6',
-          'claude-opus-4-5',
-          'claude-opus-4-1',
-          'claude-sonnet-4-5',
-          'claude-sonnet-4-0',
-          'claude-haiku-4-5',
-        ]
-
-        for (const model of models) {
-          const nonStreamingDefault = getMaxOutputTokensForModel(model, false)
-          expect(nonStreamingDefault).toBeLessThan(SDK_NON_STREAMING_THRESHOLD)
-        }
-      }
-    )
   })
 })
 
diff --git a/apps/sim/providers/utils.ts b/apps/sim/providers/utils.ts
@@ -995,15 +995,12 @@ export function getThinkingLevelsForModel(model: string): string[] | null {
 }
 
 /**
- * Get max output tokens for a specific model
- * Returns the model's maxOutputTokens capability for streaming requests,
- * or a conservative default (8192) for non-streaming requests to avoid timeout issues.
+ * Get max output tokens for a specific model.
  *
  * @param model - The model ID
- * @param streaming - Whether the request is streaming (default: false)
  */
-export function getMaxOutputTokensForModel(model: string, streaming = false): number {
-  return getMaxOutputTokensForModelFromDefinitions(model, streaming)
+export function getMaxOutputTokensForModel(model: string): number {
+  return getMaxOutputTokensForModelFromDefinitions(model)
 }
 
 /**

Original file line number	Diff line number	Diff line change
`@@ -995,15 +995,12 @@ export function getThinkingLevelsForModel(model: string): string[] \| null {`
`995`	`995`	`}`
`996`	`996`
`997`	`997`	`/**`
`998`		`- * Get max output tokens for a specific model`
`999`		`- * Returns the model's maxOutputTokens capability for streaming requests,`
`1000`		`- * or a conservative default (8192) for non-streaming requests to avoid timeout issues.`
	`998`	`+ * Get max output tokens for a specific model.`
`1001`	`999`	`*`
`1002`	`1000`	`* @param model - The model ID`
`1003`		`- * @param streaming - Whether the request is streaming (default: false)`
`1004`	`1001`	`*/`
`1005`		`-export function getMaxOutputTokensForModel(model: string, streaming = false): number {`
`1006`		`- return getMaxOutputTokensForModelFromDefinitions(model, streaming)`
	`1002`	`+export function getMaxOutputTokensForModel(model: string): number {`
	`1003`	`+ return getMaxOutputTokensForModelFromDefinitions(model)`
`1007`	`1004`	`}`
`1008`	`1005`
`1009`	`1006`	`/**`