Skip to content

Commit ca1bc18

Browse files
hannesrudolphroomotemrubens
authored
Fix rate limit wait display (#10389)
Co-authored-by: Roo Code <roomote@roocode.com> Co-authored-by: Matt Rubens <mrubens@users.noreply.github.com>
1 parent c37aa02 commit ca1bc18

File tree

25 files changed

+218
-24
lines changed

25 files changed

+218
-24
lines changed

packages/evals/src/cli/runTask.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -301,6 +301,7 @@ export const runTask = async ({ run, task, publish, logger, jobToken }: RunTaskO
301301
"diff_error",
302302
"condense_context",
303303
"condense_context_error",
304+
"api_req_rate_limit_wait",
304305
"api_req_retry_delayed",
305306
"api_req_retried",
306307
]

packages/types/src/message.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -129,6 +129,7 @@ export function isNonBlockingAsk(ask: ClineAsk): ask is NonBlockingAsk {
129129
* - `api_req_finished`: Indicates an API request has completed successfully
130130
* - `api_req_retried`: Indicates an API request is being retried after a failure
131131
* - `api_req_retry_delayed`: Indicates an API request retry has been delayed
132+
* - `api_req_rate_limit_wait`: Indicates a configured rate-limit wait (not an error)
132133
* - `api_req_deleted`: Indicates an API request has been deleted/cancelled
133134
* - `text`: General text message or assistant response
134135
* - `reasoning`: Assistant's reasoning or thought process (often hidden from user)
@@ -155,6 +156,7 @@ export const clineSays = [
155156
"api_req_finished",
156157
"api_req_retried",
157158
"api_req_retry_delayed",
159+
"api_req_rate_limit_wait",
158160
"api_req_deleted",
159161
"text",
160162
"image",

src/core/task/Task.ts

Lines changed: 58 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -2347,6 +2347,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
23472347
const modelId = getModelId(this.apiConfiguration)
23482348
const apiProtocol = getApiProtocol(this.apiConfiguration.apiProvider, modelId)
23492349

2350+
// Respect user-configured provider rate limiting BEFORE we emit api_req_started.
2351+
// This prevents the UI from showing an "API Request..." spinner while we are
2352+
// intentionally waiting due to the rate limit slider.
2353+
//
2354+
// NOTE: We also set Task.lastGlobalApiRequestTime here to reserve this slot
2355+
// before we build environment details (which can take time).
2356+
// This ensures subsequent requests (including subtasks) still honour the
2357+
// provider rate-limit window.
2358+
await this.maybeWaitForProviderRateLimit(currentItem.retryAttempt ?? 0)
2359+
Task.lastGlobalApiRequestTime = performance.now()
2360+
23502361
await this.say(
23512362
"api_req_started",
23522363
JSON.stringify({
@@ -2554,7 +2565,7 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
25542565
// Yields only if the first chunk is successful, otherwise will
25552566
// allow the user to retry the request (most likely due to rate
25562567
// limit error, which gets thrown on the first chunk).
2557-
const stream = this.attemptApiRequest()
2568+
const stream = this.attemptApiRequest(currentItem.retryAttempt ?? 0, { skipProviderRateLimit: true })
25582569
let assistantMessage = ""
25592570
let reasoningMessage = ""
25602571
let pendingGroundingSources: GroundingSource[] = []
@@ -3656,7 +3667,44 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
36563667
await this.providerRef.deref()?.postMessageToWebview({ type: "condenseTaskContextResponse", text: this.taskId })
36573668
}
36583669

3659-
public async *attemptApiRequest(retryAttempt: number = 0): ApiStream {
3670+
/**
3671+
* Enforce the user-configured provider rate limit.
3672+
*
3673+
* NOTE: This is intentionally treated as expected behavior and is surfaced via
3674+
* the `api_req_rate_limit_wait` say type (not an error).
3675+
*/
3676+
private async maybeWaitForProviderRateLimit(retryAttempt: number): Promise<void> {
3677+
const state = await this.providerRef.deref()?.getState()
3678+
const rateLimitSeconds =
3679+
state?.apiConfiguration?.rateLimitSeconds ?? this.apiConfiguration?.rateLimitSeconds ?? 0
3680+
3681+
if (rateLimitSeconds <= 0 || !Task.lastGlobalApiRequestTime) {
3682+
return
3683+
}
3684+
3685+
const now = performance.now()
3686+
const timeSinceLastRequest = now - Task.lastGlobalApiRequestTime
3687+
const rateLimitDelay = Math.ceil(
3688+
Math.min(rateLimitSeconds, Math.max(0, rateLimitSeconds * 1000 - timeSinceLastRequest) / 1000),
3689+
)
3690+
3691+
// Only show the countdown UX on the first attempt. Retry flows have their own delay messaging.
3692+
if (rateLimitDelay > 0 && retryAttempt === 0) {
3693+
for (let i = rateLimitDelay; i > 0; i--) {
3694+
// Send structured JSON data for i18n-safe transport
3695+
const delayMessage = JSON.stringify({ seconds: i })
3696+
await this.say("api_req_rate_limit_wait", delayMessage, undefined, true)
3697+
await delay(1000)
3698+
}
3699+
// Finalize the partial message so the UI doesn't keep rendering an in-progress spinner.
3700+
await this.say("api_req_rate_limit_wait", undefined, undefined, false)
3701+
}
3702+
}
3703+
3704+
public async *attemptApiRequest(
3705+
retryAttempt: number = 0,
3706+
options: { skipProviderRateLimit?: boolean } = {},
3707+
): ApiStream {
36603708
const state = await this.providerRef.deref()?.getState()
36613709

36623710
const {
@@ -3693,29 +3741,17 @@ export class Task extends EventEmitter<TaskEvents> implements TaskLike {
36933741
}
36943742
}
36953743

3696-
let rateLimitDelay = 0
3697-
3698-
// Use the shared timestamp so that subtasks respect the same rate-limit
3699-
// window as their parent tasks.
3700-
if (Task.lastGlobalApiRequestTime) {
3701-
const now = performance.now()
3702-
const timeSinceLastRequest = now - Task.lastGlobalApiRequestTime
3703-
const rateLimit = apiConfiguration?.rateLimitSeconds || 0
3704-
rateLimitDelay = Math.ceil(Math.min(rateLimit, Math.max(0, rateLimit * 1000 - timeSinceLastRequest) / 1000))
3705-
}
3706-
3707-
// Only show rate limiting message if we're not retrying. If retrying, we'll include the delay there.
3708-
if (rateLimitDelay > 0 && retryAttempt === 0) {
3709-
// Show countdown timer
3710-
for (let i = rateLimitDelay; i > 0; i--) {
3711-
const delayMessage = `Rate limiting for ${i} seconds...`
3712-
await this.say("api_req_retry_delayed", delayMessage, undefined, true)
3713-
await delay(1000)
3714-
}
3744+
if (!options.skipProviderRateLimit) {
3745+
await this.maybeWaitForProviderRateLimit(retryAttempt)
37153746
}
37163747

3717-
// Update last request time before making the request so that subsequent
3748+
// Update last request time right before making the request so that subsequent
37183749
// requests — even from new subtasks — will honour the provider's rate-limit.
3750+
//
3751+
// NOTE: When recursivelyMakeClineRequests handles rate limiting, it sets the
3752+
// timestamp earlier to include the environment details build. We still set it
3753+
// here for direct callers (tests) and for the case where we didn't rate-limit
3754+
// in the caller.
37193755
Task.lastGlobalApiRequestTime = performance.now()
37203756

37213757
const systemPrompt = await this.getSystemPrompt()

src/core/task/__tests__/Task.spec.ts

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1041,6 +1041,9 @@ describe("Cline", () => {
10411041
startTask: false,
10421042
})
10431043

1044+
// Spy on child.say to verify the emitted message type
1045+
const saySpy = vi.spyOn(child, "say")
1046+
10441047
// Mock the child's API stream
10451048
const childMockStream = {
10461049
async *[Symbol.asyncIterator]() {
@@ -1067,6 +1070,17 @@ describe("Cline", () => {
10671070
// Verify rate limiting was applied
10681071
expect(mockDelay).toHaveBeenCalledTimes(mockApiConfig.rateLimitSeconds)
10691072
expect(mockDelay).toHaveBeenCalledWith(1000)
1073+
1074+
// Verify we used the non-error rate-limit wait message type (JSON format)
1075+
expect(saySpy).toHaveBeenCalledWith(
1076+
"api_req_rate_limit_wait",
1077+
expect.stringMatching(/\{"seconds":\d+\}/),
1078+
undefined,
1079+
true,
1080+
)
1081+
1082+
// Verify the wait message was finalized
1083+
expect(saySpy).toHaveBeenCalledWith("api_req_rate_limit_wait", undefined, undefined, false)
10701084
}, 10000) // Increase timeout to 10 seconds
10711085

10721086
it("should not apply rate limiting if enough time has passed", async () => {

webview-ui/src/components/chat/ChatRow.tsx

Lines changed: 45 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -298,6 +298,8 @@ export const ChatRowContent = ({
298298
style={{ color: successColor, marginBottom: "-1.5px" }}></span>,
299299
<span style={{ color: successColor, fontWeight: "bold" }}>{t("chat:taskCompleted")}</span>,
300300
]
301+
case "api_req_rate_limit_wait":
302+
return []
301303
case "api_req_retry_delayed":
302304
return []
303305
case "api_req_started":
@@ -327,8 +329,10 @@ export const ChatRowContent = ({
327329
getIconSpan("arrow-swap", normalColor)
328330
) : apiRequestFailedMessage ? (
329331
getIconSpan("error", errorColor)
330-
) : (
332+
) : isLast ? (
331333
<ProgressIndicator />
334+
) : (
335+
getIconSpan("arrow-swap", normalColor)
332336
),
333337
apiReqCancelReason !== null && apiReqCancelReason !== undefined ? (
334338
apiReqCancelReason === "user_cancelled" ? (
@@ -356,7 +360,17 @@ export const ChatRowContent = ({
356360
default:
357361
return [null, null]
358362
}
359-
}, [type, isCommandExecuting, message, isMcpServerResponding, apiReqCancelReason, cost, apiRequestFailedMessage, t])
363+
}, [
364+
type,
365+
isCommandExecuting,
366+
message,
367+
isMcpServerResponding,
368+
apiReqCancelReason,
369+
cost,
370+
apiRequestFailedMessage,
371+
t,
372+
isLast,
373+
])
360374

361375
const headerStyle: React.CSSProperties = {
362376
display: "flex",
@@ -1149,6 +1163,35 @@ export const ChatRowContent = ({
11491163
errorDetails={rawError}
11501164
/>
11511165
)
1166+
case "api_req_rate_limit_wait": {
1167+
const isWaiting = message.partial === true
1168+
1169+
const waitSeconds = (() => {
1170+
if (!message.text) return undefined
1171+
try {
1172+
const data = JSON.parse(message.text)
1173+
return typeof data.seconds === "number" ? data.seconds : undefined
1174+
} catch {
1175+
return undefined
1176+
}
1177+
})()
1178+
1179+
return isWaiting && waitSeconds !== undefined ? (
1180+
<div
1181+
className={`group text-sm transition-opacity opacity-100`}
1182+
style={{
1183+
...headerStyle,
1184+
marginBottom: 0,
1185+
justifyContent: "space-between",
1186+
}}>
1187+
<div style={{ display: "flex", alignItems: "center", gap: "10px", flexGrow: 1 }}>
1188+
<ProgressIndicator />
1189+
<span style={{ color: normalColor }}>{t("chat:apiRequest.rateLimitWait")}</span>
1190+
</div>
1191+
<span className="text-xs font-light text-vscode-descriptionForeground">{waitSeconds}s</span>
1192+
</div>
1193+
) : null
1194+
}
11521195
case "api_req_finished":
11531196
return null // we should never see this message type
11541197
case "text":

webview-ui/src/components/chat/ChatView.tsx

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -400,6 +400,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
400400
// an "ask" while ask is waiting for response.
401401
switch (lastMessage.say) {
402402
case "api_req_retry_delayed":
403+
case "api_req_rate_limit_wait":
403404
setSendingDisabled(true)
404405
break
405406
case "api_req_started":
@@ -957,6 +958,7 @@ const ChatViewComponent: React.ForwardRefRenderFunction<ChatViewRef, ChatViewPro
957958
case "api_req_deleted":
958959
return false
959960
case "api_req_retry_delayed":
961+
case "api_req_rate_limit_wait":
960962
const last1 = modifiedMessages.at(-1)
961963
const last2 = modifiedMessages.at(-2)
962964
if (last1?.ask === "resume_task" && last2 === message) {
Lines changed: 78 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,78 @@
1+
import React from "react"
2+
3+
import { render, screen } from "@/utils/test-utils"
4+
import { QueryClient, QueryClientProvider } from "@tanstack/react-query"
5+
import { ExtensionStateContextProvider } from "@src/context/ExtensionStateContext"
6+
import { ChatRowContent } from "../ChatRow"
7+
8+
// Mock i18n
9+
vi.mock("react-i18next", () => ({
10+
useTranslation: () => ({
11+
t: (key: string) => {
12+
const map: Record<string, string> = {
13+
"chat:apiRequest.rateLimitWait": "Rate limiting",
14+
}
15+
return map[key] ?? key
16+
},
17+
}),
18+
Trans: ({ children }: { children?: React.ReactNode }) => <>{children}</>,
19+
initReactI18next: { type: "3rdParty", init: () => {} },
20+
}))
21+
22+
const queryClient = new QueryClient()
23+
24+
function renderChatRow(message: any) {
25+
return render(
26+
<ExtensionStateContextProvider>
27+
<QueryClientProvider client={queryClient}>
28+
<ChatRowContent
29+
message={message}
30+
isExpanded={false}
31+
isLast={false}
32+
isStreaming={false}
33+
onToggleExpand={() => {}}
34+
onSuggestionClick={() => {}}
35+
onBatchFileResponse={() => {}}
36+
onFollowUpUnmount={() => {}}
37+
isFollowUpAnswered={false}
38+
/>
39+
</QueryClientProvider>
40+
</ExtensionStateContextProvider>,
41+
)
42+
}
43+
44+
describe("ChatRow - rate limit wait", () => {
45+
it("renders a non-error progress row for api_req_rate_limit_wait", () => {
46+
const message: any = {
47+
type: "say",
48+
say: "api_req_rate_limit_wait",
49+
ts: Date.now(),
50+
partial: true,
51+
text: JSON.stringify({ seconds: 1 }),
52+
}
53+
54+
renderChatRow(message)
55+
56+
expect(screen.getByText("Rate limiting")).toBeInTheDocument()
57+
// Should show countdown, but should NOT show the error-details affordance.
58+
expect(screen.getByText("1s")).toBeInTheDocument()
59+
expect(screen.queryByText("Details")).toBeNull()
60+
})
61+
62+
it("renders nothing when rate limit wait is complete", () => {
63+
const message: any = {
64+
type: "say",
65+
say: "api_req_rate_limit_wait",
66+
ts: Date.now(),
67+
partial: false,
68+
text: undefined,
69+
}
70+
71+
const { container } = renderChatRow(message)
72+
73+
// The row should be hidden when rate limiting is complete
74+
expect(screen.queryByText("Rate limiting")).toBeNull()
75+
// Nothing should be rendered
76+
expect(container.firstChild).toBeNull()
77+
})
78+
})

webview-ui/src/i18n/locales/ca/chat.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webview-ui/src/i18n/locales/de/chat.json

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

webview-ui/src/i18n/locales/en/chat.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -144,6 +144,7 @@
144144
"streaming": "API Request...",
145145
"cancelled": "API Request Cancelled",
146146
"streamingFailed": "API Streaming Failed",
147+
"rateLimitWait": "Rate limiting",
147148
"errorTitle": "Provider Error {{code}}",
148149
"errorMessage": {
149150
"docs": "Docs",

0 commit comments

Comments
 (0)