Skip to content

Commit 582959c

Browse files
committed
removed default in max config for output tokens
1 parent f366679 commit 582959c

File tree

4 files changed

+38
-110
lines changed

4 files changed

+38
-110
lines changed

apps/sim/providers/anthropic/core.ts

Lines changed: 8 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -265,8 +265,7 @@ export async function executeAnthropicProviderRequest(
265265
messages,
266266
system: systemPrompt,
267267
max_tokens:
268-
Number.parseInt(String(request.maxTokens)) ||
269-
getMaxOutputTokensForModel(request.model, request.stream ?? false),
268+
Number.parseInt(String(request.maxTokens)) || getMaxOutputTokensForModel(request.model),
270269
temperature: Number.parseFloat(String(request.temperature ?? 0.7)),
271270
}
272271

@@ -308,7 +307,7 @@ export async function executeAnthropicProviderRequest(
308307
const budgetTokens = thinkingConfig.thinking.budget_tokens
309308
const minMaxTokens = budgetTokens + 4096
310309
if (payload.max_tokens < minMaxTokens) {
311-
const modelMax = getMaxOutputTokensForModel(request.model, true)
310+
const modelMax = getMaxOutputTokensForModel(request.model)
312311
payload.max_tokens = Math.min(minMaxTokens, modelMax)
313312
logger.info(
314313
`Adjusted max_tokens to ${payload.max_tokens} to satisfy budget_tokens (${budgetTokens}) constraint`
@@ -438,25 +437,13 @@ export async function executeAnthropicProviderRequest(
438437
const providerStartTime = Date.now()
439438
const providerStartTimeISO = new Date(providerStartTime).toISOString()
440439

441-
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
442-
// but allow users to set lower values if desired. Use Math.max to preserve
443-
// thinking-adjusted max_tokens from payload when it's higher.
444-
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
445-
const nonStreamingMaxTokens = request.maxTokens
446-
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
447-
: nonStreamingLimit
448-
const intermediatePayload = {
449-
...payload,
450-
max_tokens: Math.max(nonStreamingMaxTokens, payload.max_tokens),
451-
}
452-
453440
try {
454441
const initialCallTime = Date.now()
455-
const originalToolChoice = intermediatePayload.tool_choice
442+
const originalToolChoice = payload.tool_choice
456443
const forcedTools = preparedTools?.forcedTools || []
457444
let usedForcedTools: string[] = []
458445

459-
let currentResponse = await createMessage(anthropic, intermediatePayload)
446+
let currentResponse = await createMessage(anthropic, payload)
460447
const firstResponseTime = Date.now() - initialCallTime
461448

462449
let content = ''
@@ -669,7 +656,7 @@ export async function executeAnthropicProviderRequest(
669656
toolsTime += thisToolsTime
670657

671658
const nextPayload = {
672-
...intermediatePayload,
659+
...payload,
673660
messages: currentMessages,
674661
}
675662

@@ -852,25 +839,13 @@ export async function executeAnthropicProviderRequest(
852839
const providerStartTime = Date.now()
853840
const providerStartTimeISO = new Date(providerStartTime).toISOString()
854841

855-
// Cap intermediate calls at non-streaming limit to avoid SDK timeout errors,
856-
// but allow users to set lower values if desired. Use Math.max to preserve
857-
// thinking-adjusted max_tokens from payload when it's higher.
858-
const nonStreamingLimit = getMaxOutputTokensForModel(request.model, false)
859-
const toolLoopMaxTokens = request.maxTokens
860-
? Math.min(Number.parseInt(String(request.maxTokens)), nonStreamingLimit)
861-
: nonStreamingLimit
862-
const toolLoopPayload = {
863-
...payload,
864-
max_tokens: Math.max(toolLoopMaxTokens, payload.max_tokens),
865-
}
866-
867842
try {
868843
const initialCallTime = Date.now()
869-
const originalToolChoice = toolLoopPayload.tool_choice
844+
const originalToolChoice = payload.tool_choice
870845
const forcedTools = preparedTools?.forcedTools || []
871846
let usedForcedTools: string[] = []
872847

873-
let currentResponse = await createMessage(anthropic, toolLoopPayload)
848+
let currentResponse = await createMessage(anthropic, payload)
874849
const firstResponseTime = Date.now() - initialCallTime
875850

876851
let content = ''
@@ -1096,7 +1071,7 @@ export async function executeAnthropicProviderRequest(
10961071
toolsTime += thisToolsTime
10971072

10981073
const nextPayload = {
1099-
...toolLoopPayload,
1074+
...payload,
11001075
messages: currentMessages,
11011076
}
11021077

apps/sim/providers/models.ts

Lines changed: 19 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -34,17 +34,8 @@ export interface ModelCapabilities {
3434
toolUsageControl?: boolean
3535
computerUse?: boolean
3636
nativeStructuredOutputs?: boolean
37-
/**
38-
* Max output tokens configuration for Anthropic SDK's streaming timeout workaround.
39-
* The Anthropic SDK throws an error for non-streaming requests that may take >10 minutes.
40-
* This only applies to direct Anthropic API calls, not Bedrock (which uses AWS SDK).
41-
*/
42-
maxOutputTokens?: {
43-
/** Maximum supported output tokens (used for streaming requests) */
44-
max: number
45-
/** Conservative default when user doesn't specify maxTokens (controls cost/latency) */
46-
default: number
47-
}
37+
/** Maximum supported output tokens for this model */
38+
maxOutputTokens?: number
4839
reasoningEffort?: {
4940
values: string[]
5041
}
@@ -339,7 +330,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
339330
capabilities: {
340331
temperature: { min: 0, max: 1 },
341332
nativeStructuredOutputs: true,
342-
maxOutputTokens: { max: 128000, default: 8192 },
333+
maxOutputTokens: 128000,
343334
thinking: {
344335
levels: ['low', 'medium', 'high', 'max'],
345336
default: 'high',
@@ -358,7 +349,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
358349
capabilities: {
359350
temperature: { min: 0, max: 1 },
360351
nativeStructuredOutputs: true,
361-
maxOutputTokens: { max: 64000, default: 8192 },
352+
maxOutputTokens: 64000,
362353
thinking: {
363354
levels: ['low', 'medium', 'high'],
364355
default: 'high',
@@ -377,7 +368,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
377368
capabilities: {
378369
temperature: { min: 0, max: 1 },
379370
nativeStructuredOutputs: true,
380-
maxOutputTokens: { max: 64000, default: 8192 },
371+
maxOutputTokens: 64000,
381372
thinking: {
382373
levels: ['low', 'medium', 'high'],
383374
default: 'high',
@@ -395,7 +386,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
395386
},
396387
capabilities: {
397388
temperature: { min: 0, max: 1 },
398-
maxOutputTokens: { max: 64000, default: 8192 },
389+
maxOutputTokens: 64000,
399390
thinking: {
400391
levels: ['low', 'medium', 'high'],
401392
default: 'high',
@@ -414,7 +405,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
414405
capabilities: {
415406
temperature: { min: 0, max: 1 },
416407
nativeStructuredOutputs: true,
417-
maxOutputTokens: { max: 64000, default: 8192 },
408+
maxOutputTokens: 64000,
418409
thinking: {
419410
levels: ['low', 'medium', 'high'],
420411
default: 'high',
@@ -432,7 +423,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
432423
},
433424
capabilities: {
434425
temperature: { min: 0, max: 1 },
435-
maxOutputTokens: { max: 64000, default: 8192 },
426+
maxOutputTokens: 64000,
436427
thinking: {
437428
levels: ['low', 'medium', 'high'],
438429
default: 'high',
@@ -451,7 +442,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
451442
capabilities: {
452443
temperature: { min: 0, max: 1 },
453444
nativeStructuredOutputs: true,
454-
maxOutputTokens: { max: 64000, default: 8192 },
445+
maxOutputTokens: 64000,
455446
thinking: {
456447
levels: ['low', 'medium', 'high'],
457448
default: 'high',
@@ -469,7 +460,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
469460
},
470461
capabilities: {
471462
temperature: { min: 0, max: 1 },
472-
maxOutputTokens: { max: 4096, default: 4096 },
463+
maxOutputTokens: 4096,
473464
},
474465
contextWindow: 200000,
475466
},
@@ -484,7 +475,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
484475
capabilities: {
485476
temperature: { min: 0, max: 1 },
486477
computerUse: true,
487-
maxOutputTokens: { max: 64000, default: 8192 },
478+
maxOutputTokens: 64000,
488479
thinking: {
489480
levels: ['low', 'medium', 'high'],
490481
default: 'high',
@@ -743,7 +734,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
743734
capabilities: {
744735
temperature: { min: 0, max: 1 },
745736
nativeStructuredOutputs: true,
746-
maxOutputTokens: { max: 128000, default: 8192 },
737+
maxOutputTokens: 128000,
747738
thinking: {
748739
levels: ['low', 'medium', 'high', 'max'],
749740
default: 'high',
@@ -762,7 +753,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
762753
capabilities: {
763754
temperature: { min: 0, max: 1 },
764755
nativeStructuredOutputs: true,
765-
maxOutputTokens: { max: 64000, default: 8192 },
756+
maxOutputTokens: 64000,
766757
thinking: {
767758
levels: ['low', 'medium', 'high'],
768759
default: 'high',
@@ -781,7 +772,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
781772
capabilities: {
782773
temperature: { min: 0, max: 1 },
783774
nativeStructuredOutputs: true,
784-
maxOutputTokens: { max: 64000, default: 8192 },
775+
maxOutputTokens: 64000,
785776
thinking: {
786777
levels: ['low', 'medium', 'high'],
787778
default: 'high',
@@ -800,7 +791,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
800791
capabilities: {
801792
temperature: { min: 0, max: 1 },
802793
nativeStructuredOutputs: true,
803-
maxOutputTokens: { max: 64000, default: 8192 },
794+
maxOutputTokens: 64000,
804795
thinking: {
805796
levels: ['low', 'medium', 'high'],
806797
default: 'high',
@@ -819,7 +810,7 @@ export const PROVIDER_DEFINITIONS: Record<string, ProviderDefinition> = {
819810
capabilities: {
820811
temperature: { min: 0, max: 1 },
821812
nativeStructuredOutputs: true,
822-
maxOutputTokens: { max: 64000, default: 8192 },
813+
maxOutputTokens: 64000,
823814
thinking: {
824815
levels: ['low', 'medium', 'high'],
825816
default: 'high',
@@ -2490,26 +2481,19 @@ export function getThinkingLevelsForModel(modelId: string): string[] | null {
24902481
}
24912482

24922483
/**
2493-
* Get the max output tokens for a specific model
2494-
* Returns the model's max capacity for streaming requests,
2495-
* or the model's safe default for non-streaming requests to avoid timeout issues.
2484+
* Get the max output tokens for a specific model.
24962485
*
24972486
* @param modelId - The model ID
2498-
* @param streaming - Whether the request is streaming (default: false)
24992487
*/
2500-
export function getMaxOutputTokensForModel(modelId: string, streaming = false): number {
2488+
export function getMaxOutputTokensForModel(modelId: string): number {
25012489
const normalizedModelId = modelId.toLowerCase()
25022490
const STANDARD_MAX_OUTPUT_TOKENS = 4096
25032491

25042492
for (const provider of Object.values(PROVIDER_DEFINITIONS)) {
25052493
for (const model of provider.models) {
25062494
const baseModelId = model.id.toLowerCase()
25072495
if (normalizedModelId === baseModelId || normalizedModelId.startsWith(`${baseModelId}-`)) {
2508-
const outputTokens = model.capabilities.maxOutputTokens
2509-
if (outputTokens) {
2510-
return streaming ? outputTokens.max : outputTokens.default
2511-
}
2512-
return STANDARD_MAX_OUTPUT_TOKENS
2496+
return model.capabilities.maxOutputTokens || STANDARD_MAX_OUTPUT_TOKENS
25132497
}
25142498
}
25152499
}

apps/sim/providers/utils.test.ts

Lines changed: 8 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -580,53 +580,25 @@ describe('Model Capabilities', () => {
580580

581581
describe('Max Output Tokens', () => {
582582
describe('getMaxOutputTokensForModel', () => {
583-
it.concurrent('should return higher value for streaming than non-streaming (Anthropic)', () => {
584-
const streamingTokens = getMaxOutputTokensForModel('claude-opus-4-6', true)
585-
const nonStreamingTokens = getMaxOutputTokensForModel('claude-opus-4-6', false)
586-
expect(streamingTokens).toBeGreaterThan(nonStreamingTokens)
587-
expect(streamingTokens).toBe(128000)
588-
expect(nonStreamingTokens).toBe(8192)
583+
it.concurrent('should return correct max for Claude Opus 4.6', () => {
584+
expect(getMaxOutputTokensForModel('claude-opus-4-6')).toBe(128000)
589585
})
590586

591-
it.concurrent('should return correct values for Claude Sonnet 4.5', () => {
592-
expect(getMaxOutputTokensForModel('claude-sonnet-4-5', true)).toBe(64000)
593-
expect(getMaxOutputTokensForModel('claude-sonnet-4-5', false)).toBe(8192)
587+
it.concurrent('should return correct max for Claude Sonnet 4.5', () => {
588+
expect(getMaxOutputTokensForModel('claude-sonnet-4-5')).toBe(64000)
594589
})
595590

596-
it.concurrent('should return correct values for Claude Opus 4.1', () => {
597-
expect(getMaxOutputTokensForModel('claude-opus-4-1', true)).toBe(64000)
598-
expect(getMaxOutputTokensForModel('claude-opus-4-1', false)).toBe(8192)
591+
it.concurrent('should return correct max for Claude Opus 4.1', () => {
592+
expect(getMaxOutputTokensForModel('claude-opus-4-1')).toBe(64000)
599593
})
600594

601595
it.concurrent('should return standard default for models without maxOutputTokens', () => {
602-
expect(getMaxOutputTokensForModel('gpt-4o', false)).toBe(4096)
603-
expect(getMaxOutputTokensForModel('gpt-4o', true)).toBe(4096)
596+
expect(getMaxOutputTokensForModel('gpt-4o')).toBe(4096)
604597
})
605598

606599
it.concurrent('should return standard default for unknown models', () => {
607-
expect(getMaxOutputTokensForModel('unknown-model', false)).toBe(4096)
608-
expect(getMaxOutputTokensForModel('unknown-model', true)).toBe(4096)
600+
expect(getMaxOutputTokensForModel('unknown-model')).toBe(4096)
609601
})
610-
611-
it.concurrent(
612-
'non-streaming default should be within Anthropic SDK non-streaming threshold',
613-
() => {
614-
const SDK_NON_STREAMING_THRESHOLD = 21333
615-
const models = [
616-
'claude-opus-4-6',
617-
'claude-opus-4-5',
618-
'claude-opus-4-1',
619-
'claude-sonnet-4-5',
620-
'claude-sonnet-4-0',
621-
'claude-haiku-4-5',
622-
]
623-
624-
for (const model of models) {
625-
const nonStreamingDefault = getMaxOutputTokensForModel(model, false)
626-
expect(nonStreamingDefault).toBeLessThan(SDK_NON_STREAMING_THRESHOLD)
627-
}
628-
}
629-
)
630602
})
631603
})
632604

apps/sim/providers/utils.ts

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -995,15 +995,12 @@ export function getThinkingLevelsForModel(model: string): string[] | null {
995995
}
996996

997997
/**
998-
* Get max output tokens for a specific model
999-
* Returns the model's maxOutputTokens capability for streaming requests,
1000-
* or a conservative default (8192) for non-streaming requests to avoid timeout issues.
998+
* Get max output tokens for a specific model.
1001999
*
10021000
* @param model - The model ID
1003-
* @param streaming - Whether the request is streaming (default: false)
10041001
*/
1005-
export function getMaxOutputTokensForModel(model: string, streaming = false): number {
1006-
return getMaxOutputTokensForModelFromDefinitions(model, streaming)
1002+
export function getMaxOutputTokensForModel(model: string): number {
1003+
return getMaxOutputTokensForModelFromDefinitions(model)
10071004
}
10081005

10091006
/**

0 commit comments

Comments
 (0)