Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 32 additions & 5 deletions packages/ai/src/provider-utils/localOnlyFetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
* SPDX-License-Identifier: Apache-2.0
*/

import { isLoopbackHostname } from "./localUrl";
import { extractRawHost, isLoopbackHostname } from "./localUrl";

const MAX_REDIRECTS = 5;

Expand Down Expand Up @@ -37,16 +37,34 @@ function isRedirectResponse(res: Response): boolean {
* host (`localhost`, `127.0.0.0/8`, `::1`, or IPv4-mapped loopback). Throws a
* generic, label-prefixed Error otherwise. `context` distinguishes the
* initial URL from a redirect in the message.
*
* `rawHost`, when supplied, is the host substring extracted from the raw URL
* source BEFORE WHATWG canonicalisation (see `extractRawHost`). It is used
* for the loopback-literal check instead of `url.hostname` so non-standard
* IPv4 spellings — `0x7f.0.0.1` (hex), `2130706433` (uint32), `010.0.0.1`
* (lenient octal) — that the URL parser silently rewrites to `127.0.0.1`
* cannot bypass the gate. Callers pass `rawHost` for the initial URL (where
* the raw form is available) and omit it for redirect targets (where only
* the canonicalised URL exists; the canonical-hostname check there is still
* a tightening over no check at all).
*/
function assertLoopbackTarget(url: URL, label: string, context: "initial URL" | "redirect"): void {
function assertLoopbackTarget(
url: URL,
label: string,
context: "initial URL" | "redirect",
rawHost?: string | null | undefined
): void {
if (url.protocol !== "http:" && url.protocol !== "https:") {
throw new Error(`${label}: refusing ${context} to non-HTTP(S) URL.`);
}
if (url.username || url.password) {
throw new Error(`${label}: refusing ${context} with credentials.`);
}
const host = url.hostname.replace(/^\[|\]$/g, "");
if (!isLoopbackHostname(host)) {
const hostForCheck =
rawHost !== undefined && rawHost !== null
? rawHost.replace(/^\[|\]$/g, "")
: url.hostname.replace(/^\[|\]$/g, "");
if (!isLoopbackHostname(hostForCheck)) {
throw new Error(`${label}: refusing ${context} to non-loopback host (${url.href}).`);
}
}
Expand Down Expand Up @@ -80,7 +98,16 @@ export async function localOnlyFetch(
} catch {
throw new Error(`${label}: invalid initial URL.`);
}
assertLoopbackTarget(initialUrl, label, "initial URL");
// Use the RAW host from the source string (not `initialUrl.hostname`) so
// WHATWG canonicalisation of non-standard IPv4 spellings — `0x7f.0.0.1`,
// `2130706433`, lenient `010.0.0.1` — cannot silently bypass the loopback
// gate by being rewritten to `127.0.0.1`. Sibling `normalizeLocalHttpUrl`
// closed the same bypass class; this mirrors it for `localOnlyFetch`.
const rawHost = extractRawHost(input);
if (rawHost === null) {
throw new Error(`${label}: invalid initial URL.`);
}
assertLoopbackTarget(initialUrl, label, "initial URL", rawHost);

let current = initialUrl.href;
for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
Expand Down
31 changes: 31 additions & 0 deletions packages/test/src/test/ai-provider-api/localOnlyFetch.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -184,12 +184,43 @@

it("rejects a non-HTTP(S) initial URL before issuing any fetch", async () => {
stubFetch([ok("should-not-be-reached")]);
await expect(localOnlyFetch("file:///etc/passwd", undefined, "TestProvider")).rejects.toThrow(

Check failure on line 187 in packages/test/src/test/ai-provider-api/localOnlyFetch.test.ts

View workflow job for this annotation

GitHub Actions / test-vitest-unit

packages/test/src/test/ai-provider-api/localOnlyFetch.test.ts > localOnlyFetch > rejects a non-HTTP(S) initial URL before issuing any fetch

AssertionError: expected [Function] to throw error matching /non-HTTP\(S\)/ but got 'TestProvider: invalid initial URL.' - Expected: /non-HTTP\(S\)/ + Received: "TestProvider: invalid initial URL." ❯ packages/test/src/test/ai-provider-api/localOnlyFetch.test.ts:187:81

Check failure on line 187 in packages/test/src/test/ai-provider-api/localOnlyFetch.test.ts

View workflow job for this annotation

GitHub Actions / test-vitest-unit

packages/test/src/test/ai-provider-api/localOnlyFetch.test.ts > localOnlyFetch > rejects a non-HTTP(S) initial URL before issuing any fetch

AssertionError: expected [Function] to throw error matching /non-HTTP\(S\)/ but got 'TestProvider: invalid initial URL.' - Expected: /non-HTTP\(S\)/ + Received: "TestProvider: invalid initial URL." ❯ packages/test/src/test/ai-provider-api/localOnlyFetch.test.ts:187:81
/non-HTTP\(S\)/
);
expect(calls).toHaveLength(0);
});

// Regression coverage for the WHATWG-canonicalisation SSRF bypass: the URL
// parser silently rewrites non-standard IPv4 spellings to `127.0.0.1`, so
// validating `new URL(input).hostname` would let these slip past the
// loopback gate. The fix validates the RAW host extracted from the source
// string instead. Each spelling is asserted to reject AND to issue zero
// fetches — mirrors the existing "rejects a non-loopback initial URL
// before issuing any fetch" shape.
it("rejects a hex-octet IPv4 initial URL (0x7f.0.0.1) before issuing any fetch", async () => {
stubFetch([ok("should-not-be-reached")]);
await expect(
localOnlyFetch("http://0x7f.0.0.1/", undefined, "TestProvider")
).rejects.toThrow(/non-loopback host|invalid initial URL/);
expect(calls).toHaveLength(0);
});

it("rejects a uint32 IPv4 initial URL (2130706433) before issuing any fetch", async () => {
stubFetch([ok("should-not-be-reached")]);
await expect(
localOnlyFetch("http://2130706433/", undefined, "TestProvider")
).rejects.toThrow(/non-loopback host|invalid initial URL/);
expect(calls).toHaveLength(0);
});

it("rejects a leading-zero octal-looking IPv4 initial URL (010.0.0.1) before issuing any fetch", async () => {
stubFetch([ok("should-not-be-reached")]);
await expect(
localOnlyFetch("http://010.0.0.1/", undefined, "TestProvider")
).rejects.toThrow(/non-loopback host|invalid initial URL/);
expect(calls).toHaveLength(0);
});

it("throws after more than 5 chained loopback redirects", async () => {
// Queue 6 redirects: hops 0..5 (six fetches) all return a redirect, so the
// loop exhausts MAX_REDIRECTS (5) and throws on the count guard. All
Expand Down
Loading