Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/honor-retry-after-header.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@voltagent/core": patch
---

Honor the provider's `Retry-After` header on retried model calls. The retry loop in `executeWithModelFallback` previously always used local exponential backoff capped at 10 seconds, regardless of what the server asked for; this caused concurrent agents under shared 429/503 contention to converge their retry windows. The delay now uses `Retry-After` (delta-seconds or HTTP-date, RFC 7231) as a floor, keeps the exponential floor as a backpressure baseline, and caps at 5 minutes for safety.
65 changes: 65 additions & 0 deletions packages/core/src/agent/agent.spec.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3689,6 +3689,71 @@ Use pandas and summarize findings.`.split("\n"),
}
});

it("should honor the Retry-After header on retried model calls", async () => {
const agent = new Agent({
name: "RetryAfterAgent",
instructions: "Test",
model: mockModel as any,
maxRetries: 1,
});

const mockResponse = {
text: "Retry response",
content: [{ type: "text", text: "Retry response" }],
reasoning: [],
files: [],
sources: [],
toolCalls: [],
toolResults: [],
finishReason: "stop",
usage: { inputTokens: 10, outputTokens: 5, totalTokens: 15 },
warnings: [],
request: {},
response: {
id: "retry-after-response",
modelId: "test-model",
timestamp: new Date(),
messages: [],
},
steps: [],
};

const observedDelays: number[] = [];
const originalSetTimeout = global.setTimeout;
const setTimeoutSpy = vi.spyOn(global, "setTimeout").mockImplementation(((
cb: any,
delay: any,
...args: any[]
) => {
if (typeof delay === "number") observedDelays.push(delay);
return originalSetTimeout(cb, 0, ...args);
}) as typeof setTimeout);

let callCount = 0;
vi.mocked(ai.generateText).mockImplementation(async () => {
callCount += 1;
if (callCount < 2) {
const error = new Error("Rate limited");
(error as any).isRetryable = true;
(error as any).statusCode = 429;
(error as any).responseHeaders = { "retry-after": "30" };
throw error;
}
return mockResponse as any;
});

try {
const result = await agent.generateText("Test");

expect(result.text).toBe("Retry response");
expect(vi.mocked(ai.generateText)).toHaveBeenCalledTimes(2);
// attemptIndex=0: exponential floor=1000ms, server hint=30000ms ⇒ delay=30000.
expect(observedDelays).toContain(30_000);
} finally {
setTimeoutSpy.mockRestore();
}
});

it("should handle model errors gracefully", async () => {
const agent = new Agent({
name: "TestAgent",
Expand Down
3 changes: 2 additions & 1 deletion packages/core/src/agent/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -120,6 +120,7 @@ import type {
ToolExecuteOptions,
UsageInfo,
} from "./providers/base/types";
import { computeRetryDelayMs } from "./retry-after";
import { coerceStringifiedJsonToolArgs } from "./tool-input-coercion";
export type { AgentHooks } from "./hooks";
export type {
Expand Down Expand Up @@ -5885,7 +5886,7 @@ export class Agent {
const canRetry = retryEligible && !isLastAttempt;

if (canRetry) {
const retryDelayMs = Math.min(1000 * 2 ** attemptIndex, 10000);
const retryDelayMs = computeRetryDelayMs(error, attemptIndex);
logger.debug(`[Agent:${this.name}] - Model attempt failed, retrying`, {
operation,
modelName,
Expand Down
122 changes: 122 additions & 0 deletions packages/core/src/agent/retry-after.spec.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,122 @@
import { describe, expect, it } from "vitest";
import { computeRetryDelayMs, getRetryAfterMs, parseRetryAfter } from "./retry-after";

const FIXED_NOW = Date.parse("2026-05-14T20:00:00Z");

describe("parseRetryAfter", () => {
it("returns null for missing values", () => {
expect(parseRetryAfter(undefined)).toBeNull();
expect(parseRetryAfter(null)).toBeNull();
expect(parseRetryAfter("")).toBeNull();
expect(parseRetryAfter(" ")).toBeNull();
});

it("parses delta-seconds form", () => {
expect(parseRetryAfter("0")).toBe(0);
expect(parseRetryAfter("1")).toBe(1000);
expect(parseRetryAfter("120")).toBe(120 * 1000);
expect(parseRetryAfter(" 30 ")).toBe(30 * 1000);
});

it("rejects non-integer delta-seconds forms", () => {
expect(parseRetryAfter("1.5")).toBeNull();
expect(parseRetryAfter("10ms")).toBeNull();
expect(parseRetryAfter("-5")).toBeNull();
expect(parseRetryAfter("0x10")).toBeNull();
});

it("parses HTTP-date form into a relative delay", () => {
const fiveSecondsLater = new Date(FIXED_NOW + 5000).toUTCString();
expect(parseRetryAfter(fiveSecondsLater, FIXED_NOW)).toBe(5000);
});

it("returns 0 when the HTTP-date has already passed", () => {
const pastDate = new Date(FIXED_NOW - 60_000).toUTCString();
expect(parseRetryAfter(pastDate, FIXED_NOW)).toBe(0);
});

it("returns null for malformed HTTP-date strings", () => {
expect(parseRetryAfter("Definitely not a date")).toBeNull();
expect(parseRetryAfter("Fri, 99 Foo 9999 99:99:99 GMT")).toBeNull();
});

it("clamps very large delta-seconds to the 5-minute safety cap", () => {
expect(parseRetryAfter("3600")).toBe(5 * 60 * 1000);
expect(parseRetryAfter("999999")).toBe(5 * 60 * 1000);
});

it("clamps very far-future HTTP-dates to the 5-minute safety cap", () => {
const farFuture = new Date(FIXED_NOW + 60 * 60 * 1000).toUTCString();
expect(parseRetryAfter(farFuture, FIXED_NOW)).toBe(5 * 60 * 1000);
});
});

describe("getRetryAfterMs", () => {
it("reads lowercased header from responseHeaders", () => {
const err = { responseHeaders: { "retry-after": "10" } };
expect(getRetryAfterMs(err)).toBe(10_000);
});

it("accepts the canonical-case spelling too", () => {
const err = { responseHeaders: { "Retry-After": "10" } };
expect(getRetryAfterMs(err)).toBe(10_000);
});

it("prefers lowercase over canonical when both are present", () => {
const err = { responseHeaders: { "retry-after": "5", "Retry-After": "999" } };
expect(getRetryAfterMs(err)).toBe(5_000);
});

it("matches the header name case-insensitively", () => {
expect(getRetryAfterMs({ responseHeaders: { "Retry-after": "7" } })).toBe(7_000);
expect(getRetryAfterMs({ responseHeaders: { "RETRY-AFTER": "8" } })).toBe(8_000);
expect(getRetryAfterMs({ responseHeaders: { "rEtRy-AfTeR": "9" } })).toBe(9_000);
});

it("returns null when the header is absent", () => {
expect(getRetryAfterMs({ responseHeaders: {} })).toBeNull();
expect(getRetryAfterMs({ responseHeaders: { "x-foo": "bar" } })).toBeNull();
});

it("returns null when there are no response headers at all", () => {
expect(getRetryAfterMs({})).toBeNull();
expect(getRetryAfterMs(null)).toBeNull();
expect(getRetryAfterMs(undefined)).toBeNull();
expect(getRetryAfterMs(new Error("plain"))).toBeNull();
});
});

describe("computeRetryDelayMs", () => {
it("falls back to exponential when no Retry-After is provided", () => {
const err = new Error("transient");
expect(computeRetryDelayMs(err, 0)).toBe(1000);
expect(computeRetryDelayMs(err, 1)).toBe(2000);
expect(computeRetryDelayMs(err, 2)).toBe(4000);
expect(computeRetryDelayMs(err, 3)).toBe(8000);
expect(computeRetryDelayMs(err, 4)).toBe(10_000);
expect(computeRetryDelayMs(err, 10)).toBe(10_000);
});

it("uses the server's Retry-After as a floor when it exceeds the exponential floor", () => {
const err = { responseHeaders: { "retry-after": "30" } };
expect(computeRetryDelayMs(err, 0)).toBe(30_000);
expect(computeRetryDelayMs(err, 4)).toBe(30_000);
});

it("keeps the exponential floor when Retry-After is shorter", () => {
const err = { responseHeaders: { "retry-after": "0" } };
expect(computeRetryDelayMs(err, 0)).toBe(1000);
expect(computeRetryDelayMs(err, 3)).toBe(8000);
});

it("honors HTTP-date Retry-After values", () => {
const tenSecondsLater = new Date(FIXED_NOW + 10_000).toUTCString();
const err = { responseHeaders: { "retry-after": tenSecondsLater } };
expect(computeRetryDelayMs(err, 0, FIXED_NOW)).toBe(10_000);
});

it("respects the 5-minute safety cap even when the server asks for longer", () => {
const err = { responseHeaders: { "retry-after": "999999" } };
expect(computeRetryDelayMs(err, 0)).toBe(5 * 60 * 1000);
});
});
112 changes: 112 additions & 0 deletions packages/core/src/agent/retry-after.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
/**
* Cap how long we'll honor a server-supplied `Retry-After` header.
* A misconfigured or hostile server can otherwise pin an agent for hours.
*/
const MAX_RETRY_AFTER_MS = 5 * 60 * 1000;

/**
* Parse an HTTP `Retry-After` header value (RFC 7231 §7.1.3) into milliseconds.
*
* Accepts the two RFC-defined forms:
* - delta-seconds: a non-negative integer (e.g. `Retry-After: 120`)
* - HTTP-date: a fixed-form HTTP date (e.g. `Retry-After: Fri, 31 Dec 1999 23:59:59 GMT`)
*
* Returns `null` when the value is absent, empty, malformed, or negative.
* The result is clamped to {@link MAX_RETRY_AFTER_MS}.
*
* @param value The raw header value, or `undefined`/`null` when absent.
* @param nowMs Current time in milliseconds, injected for tests. Defaults to `Date.now()`.
*/
export function parseRetryAfter(
value: string | undefined | null,
nowMs: number = Date.now(),
): number | null {
if (value == null) {
return null;
}

const trimmed = value.trim();
if (trimmed === "") {
return null;
}

if (/^\d+$/.test(trimmed)) {
const seconds = Number.parseInt(trimmed, 10);
if (!Number.isFinite(seconds) || seconds < 0) {
return null;
}
return Math.min(seconds * 1000, MAX_RETRY_AFTER_MS);
}

// HTTP-date form mandates a day-name and month-name (RFC 7231 §7.1.1.1),
// so an HTTP-date always contains ASCII letters. Reject numeric-looking
// values like "1.5", "10ms", or "-5" before falling into `Date.parse`,
// which is permissive enough to coerce some of them into past dates.
if (!/[A-Za-z]/.test(trimmed)) {
return null;
}

const dateMs = Date.parse(trimmed);
if (Number.isNaN(dateMs)) {
return null;
}

const delta = dateMs - nowMs;
if (delta <= 0) {
return 0;
}
return Math.min(delta, MAX_RETRY_AFTER_MS);
}

/**
* Read the `Retry-After` header off an error's `responseHeaders` bag and return
* its parsed value in milliseconds, or `null` when absent.
*
* HTTP header field names are case-insensitive (RFC 7230 §3.2). AI SDK normalizes
* its own bag to lowercase, but providers that build `responseHeaders` from a raw
* fetch can leak any casing through, so we match the key case-insensitively.
*/
export function getRetryAfterMs(error: unknown, nowMs: number = Date.now()): number | null {
const headers = (error as { responseHeaders?: Record<string, string> } | undefined)
?.responseHeaders;
if (!headers || typeof headers !== "object") {
return null;
}
let raw: string | undefined;
for (const key of Object.keys(headers)) {
if (key.toLowerCase() === "retry-after") {
raw = headers[key];
break;
}
}
return parseRetryAfter(raw, nowMs);
Comment thread
coderabbitai[bot] marked this conversation as resolved.
}

/**
* Compute the wait between two retry attempts.
*
* When the provider supplies a `Retry-After` header (typical on 429 and 503),
* use it as the floor — the server has just told us the earliest moment it's
* willing to serve another request, and ignoring that signal causes
* coordinated retry-storms across concurrent agents.
*
* In every case we keep the exponential floor as a backpressure baseline so a
* `Retry-After: 0` (or an absent header on transient errors) still spaces
* subsequent attempts out.
*
* @param error The error thrown by the model invocation.
* @param attemptIndex Zero-based retry attempt index.
* @param nowMs Current time in ms, injected for tests.
*/
export function computeRetryDelayMs(
error: unknown,
attemptIndex: number,
nowMs: number = Date.now(),
): number {
const exponentialMs = Math.min(1000 * 2 ** attemptIndex, 10000);
const serverHintMs = getRetryAfterMs(error, nowMs);
if (serverHintMs == null) {
return exponentialMs;
}
return Math.max(serverHintMs, exponentialMs);
}
Loading