@@ -2347,6 +2347,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
23472347 const modelId = getModelId ( this . apiConfiguration )
23482348 const apiProtocol = getApiProtocol ( this . apiConfiguration . apiProvider , modelId )
23492349
2350+ // Respect user-configured provider rate limiting BEFORE we emit api_req_started.
2351+ // This prevents the UI from showing an "API Request..." spinner while we are
2352+ // intentionally waiting due to the rate limit slider.
2353+ //
2354+ // NOTE: We also set Task.lastGlobalApiRequestTime here to reserve this slot
2355+ // before we build environment details (which can take time).
2356+ // This ensures subsequent requests (including subtasks) still honour the
2357+ // provider rate-limit window.
2358+ await this . maybeWaitForProviderRateLimit ( currentItem . retryAttempt ?? 0 )
2359+ Task . lastGlobalApiRequestTime = performance . now ( )
2360+
23502361 await this . say (
23512362 "api_req_started" ,
23522363 JSON . stringify ( {
@@ -2554,7 +2565,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
25542565 // Yields only if the first chunk is successful, otherwise will
25552566 // allow the user to retry the request (most likely due to rate
25562567 // limit error, which gets thrown on the first chunk).
2557- const stream = this . attemptApiRequest ( )
2568+ const stream = this . attemptApiRequest ( currentItem . retryAttempt ?? 0 , { skipProviderRateLimit : true } )
25582569 let assistantMessage = ""
25592570 let reasoningMessage = ""
25602571 let pendingGroundingSources : GroundingSource [ ] = [ ]
@@ -3656,7 +3667,44 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
36563667 await this . providerRef . deref ( ) ?. postMessageToWebview ( { type : "condenseTaskContextResponse" , text : this . taskId } )
36573668 }
36583669
3659- public async * attemptApiRequest ( retryAttempt : number = 0 ) : ApiStream {
3670+ /**
3671+ * Enforce the user-configured provider rate limit.
3672+ *
3673+ * NOTE: This is intentionally treated as expected behavior and is surfaced via
3674+ * the `api_req_rate_limit_wait` say type (not an error).
3675+ */
3676+ private async maybeWaitForProviderRateLimit ( retryAttempt : number ) : Promise < void > {
3677+ const state = await this . providerRef . deref ( ) ?. getState ( )
3678+ const rateLimitSeconds =
3679+ state ?. apiConfiguration ?. rateLimitSeconds ?? this . apiConfiguration ?. rateLimitSeconds ?? 0
3680+
3681+ if ( rateLimitSeconds <= 0 || ! Task . lastGlobalApiRequestTime ) {
3682+ return
3683+ }
3684+
3685+ const now = performance . now ( )
3686+ const timeSinceLastRequest = now - Task . lastGlobalApiRequestTime
3687+ const rateLimitDelay = Math . ceil (
3688+ Math . min ( rateLimitSeconds , Math . max ( 0 , rateLimitSeconds * 1000 - timeSinceLastRequest ) / 1000 ) ,
3689+ )
3690+
3691+ // Only show the countdown UX on the first attempt. Retry flows have their own delay messaging.
3692+ if ( rateLimitDelay > 0 && retryAttempt === 0 ) {
3693+ for ( let i = rateLimitDelay ; i > 0 ; i -- ) {
3694+ // Send structured JSON data for i18n-safe transport
3695+ const delayMessage = JSON . stringify ( { seconds : i } )
3696+ await this . say ( "api_req_rate_limit_wait" , delayMessage , undefined , true )
3697+ await delay ( 1000 )
3698+ }
3699+ // Finalize the partial message so the UI doesn't keep rendering an in-progress spinner.
3700+ await this . say ( "api_req_rate_limit_wait" , undefined , undefined , false )
3701+ }
3702+ }
3703+
3704+ public async * attemptApiRequest (
3705+ retryAttempt : number = 0 ,
3706+ options : { skipProviderRateLimit ?: boolean } = { } ,
3707+ ) : ApiStream {
36603708 const state = await this . providerRef . deref ( ) ?. getState ( )
36613709
36623710 const {
@@ -3693,29 +3741,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
36933741 }
36943742 }
36953743
3696- let rateLimitDelay = 0
3697-
3698- // Use the shared timestamp so that subtasks respect the same rate-limit
3699- // window as their parent tasks.
3700- if ( Task . lastGlobalApiRequestTime ) {
3701- const now = performance . now ( )
3702- const timeSinceLastRequest = now - Task . lastGlobalApiRequestTime
3703- const rateLimit = apiConfiguration ?. rateLimitSeconds || 0
3704- rateLimitDelay = Math . ceil ( Math . min ( rateLimit , Math . max ( 0 , rateLimit * 1000 - timeSinceLastRequest ) / 1000 ) )
3705- }
3706-
3707- // Only show rate limiting message if we're not retrying. If retrying, we'll include the delay there.
3708- if ( rateLimitDelay > 0 && retryAttempt === 0 ) {
3709- // Show countdown timer
3710- for ( let i = rateLimitDelay ; i > 0 ; i -- ) {
3711- const delayMessage = `Rate limiting for ${ i } seconds...`
3712- await this . say ( "api_req_retry_delayed" , delayMessage , undefined , true )
3713- await delay ( 1000 )
3714- }
3744+ if ( ! options . skipProviderRateLimit ) {
3745+ await this . maybeWaitForProviderRateLimit ( retryAttempt )
37153746 }
37163747
3717- // Update last request time before making the request so that subsequent
3748+ // Update last request time right before making the request so that subsequent
37183749 // requests — even from new subtasks — will honour the provider's rate-limit.
3750+ //
3751+ // NOTE: When recursivelyMakeClineRequests handles rate limiting, it sets the
3752+ // timestamp earlier to include the environment details build. We still set it
3753+ // here for direct callers (tests) and for the case where we didn't rate-limit
3754+ // in the caller.
37193755 Task . lastGlobalApiRequestTime = performance . now ( )
37203756
37213757 const systemPrompt = await this . getSystemPrompt ( )
0 commit comments