Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions packages/ai/src/provider-utils.ts
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ export * from "./provider-utils/CloudProviderClient";
export * from "./provider-utils/OpenAIShapedChat";
export * from "./provider-utils/IBackendsTransport";
export * from "./provider-utils/localUrl";
export * from "./provider-utils/localOnlyFetch";
96 changes: 96 additions & 0 deletions packages/ai/src/provider-utils/localOnlyFetch.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
/**
* @license
* Copyright 2026 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

import { isLoopbackHostname } from "./localUrl";

const MAX_REDIRECTS = 5;

/**
* Standard HTTP redirect status codes per the Fetch/HTTP specs. A 3xx that is
* NOT one of these (e.g. `300 Multiple Choices`, `304 Not Modified`,
* `306 (unused)`) is NOT a redirect even if it carries a `Location` header —
* such responses are returned to the caller unchanged.
*/
const REDIRECT_STATUS_CODES = new Set([301, 302, 303, 307, 308]);

/**
* Returns true only when `res` is a spec-shaped standard redirect: a numeric
* `status` in {301,302,303,307,308} AND a `headers.get` method we can read
* `location` from. Real `fetch` responses satisfy both; minimal test doubles
* (e.g. `{ ok: true, json }`) have `undefined` status/headers and are
* therefore treated as terminal responses rather than misclassified as
* redirects (which would throw on `res.headers.get`). This narrows the
* redirect path without weakening validation of genuine redirects.
*/
function isRedirectResponse(res: Response): boolean {
const status = res.status;
if (typeof status !== "number" || !REDIRECT_STATUS_CODES.has(status)) return false;
return typeof res.headers?.get === "function";
}

/**
* Validate a request target against the strict LOOPBACK-ONLY policy: it must
* be a valid http(s) URL, carry no credentials, and resolve to a loopback
* host (`localhost`, `127.0.0.0/8`, `::1`, or IPv4-mapped loopback). Throws a
* generic, label-prefixed Error otherwise. `context` distinguishes the
* initial URL from a redirect in the message.
*/
function assertLoopbackTarget(url: URL, label: string, context: "initial URL" | "redirect"): void {
if (url.protocol !== "http:" && url.protocol !== "https:") {
throw new Error(`${label}: refusing ${context} to non-HTTP(S) URL.`);
}
if (url.username || url.password) {
throw new Error(`${label}: refusing ${context} with credentials.`);
}
const host = url.hostname.replace(/^\[|\]$/g, "");
if (!isLoopbackHostname(host)) {
throw new Error(`${label}: refusing ${context} to non-loopback host (${url.href}).`);
}
}

/**
* fetch() restricted to STRICTLY-LOOPBACK hosts on EVERY hop. These AI server
* clients only ever talk to a backend on the same host, so a broad "local"
* allow-list (which includes RFC 1918 and the `169.254.169.254` cloud-metadata
* address) is wider than needed and is itself an SSRF vector. This wrapper
* therefore enforces loopback-only on:
* - the initial `input` URL, validated defensively BEFORE any network call
* (a bad initial URL throws with zero fetches issued); and
* - every redirect `Location`, resolved against the current URL and
* re-validated, closing the redirect-based SSRF bypass left by
* base-URL-only validation.
*
* Only standard redirect codes (301/302/303/307/308) are followed; other 3xx
* responses are returned unchanged. The final Response is returned untouched
* so streaming consumers are unaffected.
*/
export async function localOnlyFetch(
input: string,
init?: RequestInit,
label = "localOnlyFetch",
): Promise<Response> {
// Defensively validate the initial URL BEFORE issuing any request. A bad
// initial URL must throw before fetch is ever called (zero network calls).
let initialUrl: URL;
try {
initialUrl = new URL(input);
} catch {
throw new Error(`${label}: invalid initial URL.`);
}
assertLoopbackTarget(initialUrl, label, "initial URL");

let current = initialUrl.href;
for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
const res = await fetch(current, { ...init, redirect: "manual" });
if (!isRedirectResponse(res)) return res;
const location = res.headers.get("location");
if (!location) return res;
Comment on lines +87 to +90
Copy link
Copy Markdown
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Fixed in 80b3c33. Redirect following is now restricted to the standard redirect codes 301/302/303/307/308 (a REDIRECT_STATUS_CODES Set). Other 3xx responses (e.g. 300, 304, 306) are returned unchanged even if they carry a Location. The numeric-status + headers.get guard is retained so non-spec test doubles still pass through as terminal.


Generated by Claude Code

const next = new URL(location, current);
assertLoopbackTarget(next, label, "redirect");
current = next.href;
}
throw new Error(`${label}: too many redirects (> ${MAX_REDIRECTS}).`);
}
93 changes: 92 additions & 1 deletion packages/ai/src/provider-utils/localUrl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,97 @@ export function isLocalHostname(host: string): boolean {
return false;
}

/**
* Returns true if `host` is a dotted-quad IPv4 literal in the loopback range
* `127.0.0.0/8`. Rejects leading zeros (no `010.0.0.1`) and malformed input.
* This is a strict subset of {@link isLocalIpv4} — RFC 1918 and link-local
* (including the `169.254.169.254` cloud-metadata IP) are NOT loopback.
*/
function isLoopbackIpv4(host: string): boolean {
if (typeof host !== "string") return false;
const parts = host.split(".");
if (parts.length !== 4) return false;
for (const p of parts) {
// Reject leading zeros (octal-looking forms) — accept only "0" or
// 1-3 digits not starting with 0.
if (!/^(0|[1-9][0-9]{0,2})$/.test(p)) return false;
const n = Number(p);
if (!(n >= 0 && n <= 255)) return false;
}
// 127.0.0.0/8 is loopback in its entirety per RFC 1122.
return Number(parts[0]) === 127;
}

/**
* Returns true if `host` is an IPv6 literal (without surrounding brackets)
* that is loopback: exactly `::1`, or an IPv4-mapped IPv6 (`::ffff:0:0/96`)
* whose embedded IPv4 lies in `127.0.0.0/8`. Reuses {@link parseIpv6} so the
* strict IPv6 grammar is shared with {@link isLocalIpv6}. ULA (`fc00::/7`)
* and link-local (`fe80::/10`) are NOT loopback and return false.
*/
function isLoopbackIpv6(host: string): boolean {
const bytes = parseIpv6(host);
if (!bytes) return false;

// ::1
let allZeroExceptLast = true;
for (let i = 0; i < 15; i++) {
if (bytes[i] !== 0) {
allZeroExceptLast = false;
break;
}
}
if (allZeroExceptLast && bytes[15] === 1) return true;

// IPv4-mapped ::ffff:0:0/96 — decode the embedded IPv4 and require loopback.
let mapped = true;
for (let i = 0; i < 10; i++) {
if (bytes[i] !== 0) {
mapped = false;
break;
}
}
if (mapped && bytes[10] === 0xff && bytes[11] === 0xff) {
return isLoopbackIpv4(`${bytes[12]}.${bytes[13]}.${bytes[14]}.${bytes[15]}`);
}

return false;
}

/**
* Returns true if `host` is a STRICTLY loopback hostname literal — the policy
* for AI server clients that, by product decision, only ever talk to a server
* on the same host:
* * `localhost` (case-insensitive, optional single trailing dot)
* * IPv4 in `127.0.0.0/8`
* * IPv6 `::1` and IPv4-mapped loopback (`::ffff:127.x.x.x`)
*
* Everything else — RFC 1918, link-local (incl. the `169.254.169.254`
* cloud-metadata IP), ULA, public, `*.localhost`, IDN, percent-encoded,
* and unsigned-integer IPv4 spellings — returns false. This is intentionally
* narrower than {@link isLocalHostname}: it is the initial-URL and
* redirect-hop gate used by `localOnlyFetch` to close the link-local
* metadata SSRF vector.
*
* IPv6 literals must be passed WITHOUT surrounding brackets.
*/
export function isLoopbackHostname(host: string): boolean {
if (typeof host !== "string" || host.length === 0) return false;
const lower = host.toLowerCase();
// Allow a single trailing dot on `localhost` (FQDN root form) but nothing
// beyond it — `localhost.` is fine; `evil.localhost` / `localhost..` are not.
if (lower === "localhost" || lower === "localhost.") return true;
// Strict character class — same gate as isLocalHostname: only IPv4 dotted-
// quad and IPv6 hex/colon grammars survive. Closes percent-encoded forms,
// IDN, underscores, and any other DNS-rebindable name.
if (!/^[0-9a-f:.]+$/.test(lower)) return false;
if (lower.includes(":")) return isLoopbackIpv6(lower);
if (lower.includes(".")) return isLoopbackIpv4(lower);
// Single-token unsigned-integer forms (e.g. `2130706433`) reach here and
// are rejected — they contain neither `:` nor `.`.
return false;
}

/**
* Extract the literal host substring from `rawUrl` BEFORE the WHATWG URL
* parser canonicalises it. Returns the host as it appeared in the source
Expand All @@ -217,7 +308,7 @@ export function isLocalHostname(host: string): boolean {
* and any of those rewrites would silently bypass
* {@link isLocalHostname}'s strict-literal grammar.
*/
function extractRawHost(rawUrl: string): string | null {
export function extractRawHost(rawUrl: string): string | null {
const m = rawUrl.match(/^[A-Za-z][A-Za-z0-9+.\-]*:\/\/(?:[^/?#@]*@)?(\[[^\]]+\]|[^:/?#]+)/);
if (m === null) return null;
let host = m[1] ?? "";
Expand Down
Loading