Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
112 changes: 57 additions & 55 deletions packages/ai/src/provider-utils/localOnlyFetch.ts
Original file line number Diff line number Diff line change
Expand Up @@ -6,66 +6,43 @@

import { isLoopbackHostname } from "./localUrl";

const MAX_REDIRECTS = 5;

/**
* Standard HTTP redirect status codes per the Fetch/HTTP specs. A 3xx that is
* NOT one of these (e.g. `300 Multiple Choices`, `304 Not Modified`,
* `306 (unused)`) is NOT a redirect even if it carries a `Location` header —
* such responses are returned to the caller unchanged.
*/
const REDIRECT_STATUS_CODES = new Set([301, 302, 303, 307, 308]);

/**
* Returns true only when `res` is a spec-shaped standard redirect: a numeric
* `status` in {301,302,303,307,308} AND a `headers.get` method we can read
* `location` from. Real `fetch` responses satisfy both; minimal test doubles
* (e.g. `{ ok: true, json }`) have `undefined` status/headers and are
* therefore treated as terminal responses rather than misclassified as
* redirects (which would throw on `res.headers.get`). This narrows the
* redirect path without weakening validation of genuine redirects.
*/
function isRedirectResponse(res: Response): boolean {
const status = res.status;
if (typeof status !== "number" || !REDIRECT_STATUS_CODES.has(status)) return false;
return typeof res.headers?.get === "function";
}

/**
* Validate a request target against the strict LOOPBACK-ONLY policy: it must
* be a valid http(s) URL, carry no credentials, and resolve to a loopback
* host (`localhost`, `127.0.0.0/8`, `::1`, or IPv4-mapped loopback). Throws a
* generic, label-prefixed Error otherwise. `context` distinguishes the
* initial URL from a redirect in the message.
* generic, label-prefixed Error otherwise.
*/
function assertLoopbackTarget(url: URL, label: string, context: "initial URL" | "redirect"): void {
function assertLoopbackTarget(url: URL, label: string): void {
if (url.protocol !== "http:" && url.protocol !== "https:") {
throw new Error(`${label}: refusing ${context} to non-HTTP(S) URL.`);
throw new Error(`${label}: refusing initial URL to non-HTTP(S) URL.`);
}
if (url.username || url.password) {
throw new Error(`${label}: refusing ${context} with credentials.`);
throw new Error(`${label}: refusing initial URL with credentials.`);
}
const host = url.hostname.replace(/^\[|\]$/g, "");
if (!isLoopbackHostname(host)) {
throw new Error(`${label}: refusing ${context} to non-loopback host (${url.href}).`);
throw new Error(`${label}: refusing initial URL to non-loopback host (${url.href}).`);
}
}

/**
* fetch() restricted to STRICTLY-LOOPBACK hosts on EVERY hop. These AI server
* clients only ever talk to a backend on the same host, so a broad "local"
* allow-list (which includes RFC 1918 and the `169.254.169.254` cloud-metadata
* address) is wider than needed and is itself an SSRF vector. This wrapper
* therefore enforces loopback-only on:
* - the initial `input` URL, validated defensively BEFORE any network call
* (a bad initial URL throws with zero fetches issued); and
* - every redirect `Location`, resolved against the current URL and
* re-validated, closing the redirect-based SSRF bypass left by
* base-URL-only validation.
* fetch() restricted to STRICTLY-LOOPBACK hosts. These AI server clients only
* ever talk to a backend on the same host, so a broad "local" allow-list
* (which includes RFC 1918 and the `169.254.169.254` cloud-metadata address)
* is wider than needed and is itself an SSRF vector. This wrapper therefore:
* - validates the initial `input` URL defensively BEFORE any network call —
* a bad initial URL throws with zero fetches issued; and
* - issues the request with `redirect: "error"`, so ANY redirect from the
* local backend is REJECTED OUTRIGHT rather than followed. On-host AI
* backends never legitimately issue redirects, so following them only
* opens a redirect-based SSRF bypass (and the opaque-response trap of
* `redirect: "manual"`). Rejecting redirects outright closes the vector
* uniformly across Node/undici, Bun, and the browser.
*
* Only standard redirect codes (301/302/303/307/308) are followed; other 3xx
* responses are returned unchanged. The final Response is returned untouched
* so streaming consumers are unaffected.
* A redirect rejection is rethrown as a clear, label-prefixed error. Abort
* and network errors (including the AbortSignal in `init`) pass through
* unchanged. The final Response is returned untouched so streaming consumers
* are unaffected.
*/
export async function localOnlyFetch(
input: string,
Expand All @@ -80,17 +57,42 @@ export async function localOnlyFetch(
} catch {
throw new Error(`${label}: invalid initial URL.`);
}
assertLoopbackTarget(initialUrl, label, "initial URL");
assertLoopbackTarget(initialUrl, label);

let current = initialUrl.href;
for (let hop = 0; hop <= MAX_REDIRECTS; hop++) {
const res = await fetch(current, { ...init, redirect: "manual" });
if (!isRedirectResponse(res)) return res;
const location = res.headers.get("location");
if (!location) return res;
const next = new URL(location, current);
assertLoopbackTarget(next, label, "redirect");
current = next.href;
try {
return await fetch(initialUrl.href, { ...init, redirect: "error" });
} catch (err) {
// Re-raise abort/network errors unchanged so callers (and the
// AbortSignal) keep their existing semantics. A redirect rejection from
// `redirect: "error"` is surfaced as a clear, labeled error.
if (isAbortError(err)) throw err;
if (isRedirectError(err)) {
throw new Error(`${label}: refusing redirect from local backend`, { cause: err });
}
throw err;
}
throw new Error(`${label}: too many redirects (> ${MAX_REDIRECTS}).`);
}

/**
* True if `err` is an AbortError (from an aborted AbortSignal). Such errors
* must pass through `localOnlyFetch` unchanged.
*/
function isAbortError(err: unknown): boolean {
return (
err instanceof Error &&
(err.name === "AbortError" || (err as { code?: string }).code === "ABORT_ERR")
);
}

/**
* Best-effort detection of the rejection produced by `redirect: "error"` when
* the server responds with a redirect. The exact error class/message differs
* across runtimes (Node/undici, Bun, browsers), so we match on the well-known
* message fragment rather than a single concrete type.
*/
function isRedirectError(err: unknown): boolean {
if (!(err instanceof Error)) return false;
const cause = (err as { cause?: { message?: string } }).cause;
const msg = `${err.message} ${cause?.message ?? ""}`.toLowerCase();
return msg.includes("redirect");
}
52 changes: 44 additions & 8 deletions packages/ai/src/provider-utils/localUrl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -271,9 +271,9 @@ function isLoopbackIpv6(host: string): boolean {
* Everything else — RFC 1918, link-local (incl. the `169.254.169.254`
* cloud-metadata IP), ULA, public, `*.localhost`, IDN, percent-encoded,
* and unsigned-integer IPv4 spellings — returns false. This is intentionally
* narrower than {@link isLocalHostname}: it is the initial-URL and
* redirect-hop gate used by `localOnlyFetch` to close the link-local
* metadata SSRF vector.
* narrower than {@link isLocalHostname}: it is the initial-URL gate used by
* `localOnlyFetch` and (via {@link normalizeLoopbackHttpUrl}) the base-URL
* gate that closes the link-local metadata SSRF vector.
*
* IPv6 literals must be passed WITHOUT surrounding brackets.
*/
Expand Down Expand Up @@ -331,15 +331,49 @@ export function extractRawHost(rawUrl: string): string | null {
* spellings that WHATWG would canonicalise to a local literal).
*/
export function normalizeLocalHttpUrl(rawUrl: string, label: string): string {
return normalizeHttpUrl(rawUrl, label, isLocalHostname, "local");
}

/**
* Loopback-only twin of {@link normalizeLocalHttpUrl}: identical parsing,
* credential, scheme and canonicalisation logic, but the raw host literal is
* validated with {@link isLoopbackHostname} instead of {@link isLocalHostname}.
*
* This rejects RFC 1918, link-local (incl. the `169.254.169.254`
* cloud-metadata IP), and ULA bases at CONFIG time with a clear message,
* rather than letting them silently fail later at request time. Provider
* base-URL normalizers use this so a base like `http://10.0.0.5:9000` is
* refused up front.
*
* @throws Error if the URL is malformed, not http(s), carries credentials,
* or targets a non-loopback hostname (including non-literal IPv4
* spellings that WHATWG would canonicalise to a loopback literal).
*/
export function normalizeLoopbackHttpUrl(rawUrl: string, label: string): string {
return normalizeHttpUrl(rawUrl, label, isLoopbackHostname, "loopback");
}

/**
* Shared implementation behind {@link normalizeLocalHttpUrl} and
* {@link normalizeLoopbackHttpUrl}. The only difference between the two is the
* host predicate (`isLocalHostname` vs `isLoopbackHostname`) and the word used
* in the thrown message.
*/
function normalizeHttpUrl(
rawUrl: string,
label: string,
isAllowedHost: (host: string) => boolean,
policyWord: "local" | "loopback",
): string {
let url: URL;
try {
url = new URL(rawUrl);
} catch {
throw new Error(`${label}: base URL must be a valid local HTTP(S) URL.`);
throw new Error(`${label}: base URL must be a valid ${policyWord} HTTP(S) URL.`);
}

if (url.protocol !== "http:" && url.protocol !== "https:") {
throw new Error(`${label}: base URL must be a valid local HTTP(S) URL.`);
throw new Error(`${label}: base URL must be a valid ${policyWord} HTTP(S) URL.`);
}
if (url.username || url.password) {
throw new Error(`${label}: base URL must not include credentials.`);
Expand All @@ -348,10 +382,12 @@ export function normalizeLocalHttpUrl(rawUrl: string, label: string): string {
// Validate the LITERAL host from rawUrl, not `url.hostname` — the
// WHATWG parser rewrites non-standard IPv4 spellings (hex, decimal,
// leading-zero octets) into canonical dotted-quads that would slip
// past `isLocalHostname`.
// past the strict-literal host predicate.
const rawHost = extractRawHost(rawUrl);
if (rawHost === null || !isLocalHostname(rawHost)) {
throw new Error(`${label}: base URL must target a local HTTP(S) server (got: ${rawUrl}).`);
if (rawHost === null || !isAllowedHost(rawHost)) {
throw new Error(
`${label}: base URL must target a ${policyWord} HTTP(S) server (got: ${rawUrl}).`
);
}

// Strip trailing slashes from the path (but keep a single "/" — handled by
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -91,7 +91,7 @@ describe("acquireBaseUrl precedence", () => {
it("rejects public model URLs before requests can use them", async () => {
await expect(
acquireBaseUrl({ provider_config: { base_url: "https://example.com:8080/" } } as any, {})
).rejects.toThrow(/local HTTP/);
).rejects.toThrow(/loopback HTTP/);
});

it("normalizes slash-heavy local URLs", async () => {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ describe("acquireBaseUrl precedence", () => {
it("rejects public model URLs before requests can use them", async () => {
await expect(
acquireBaseUrl({ provider_config: { base_url: "https://example.com:8080/" } } as any, {})
).rejects.toThrow(/local HTTP/);
).rejects.toThrow(/loopback HTTP/);
});

it("normalizes slash-heavy local URLs", async () => {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
/**
* @license
* Copyright 2026 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

/**
* Integration tests for `localOnlyFetch` against a REAL loopback HTTP server.
*
* Unlike the unit tests (which stub `fetch`), these spin up an
* `http.createServer` bound to `127.0.0.1:0` so we exercise the actual
* `redirect: "error"` behaviour of the host runtime's `fetch`:
* - a 200 endpoint returns its body; and
* - a 302 endpoint causes `localOnlyFetch` to REJECT (the redirect is never
* followed).
*
* Also asserts the config-time gate: `normalizeLoopbackHttpUrl` throws on an
* RFC 1918 base URL.
*/

import { localOnlyFetch, normalizeLoopbackHttpUrl } from "@workglow/ai/provider-utils";
import { createServer, type Server } from "node:http";
import type { AddressInfo } from "node:net";
import { afterAll, beforeAll, describe, expect, it } from "vitest";

let server: Server;
let base: string;

beforeAll(async () => {
server = createServer((req, res) => {
if (req.url === "/ok") {
res.writeHead(200, { "content-type": "text/plain" });
res.end("integration-ok");
return;
}
if (req.url === "/redirect") {
res.writeHead(302, { location: "/ok" });
res.end();
return;
}
res.writeHead(404);
res.end();
});
await new Promise<void>((resolve) => server.listen(0, "127.0.0.1", resolve));
const addr = server.address() as AddressInfo;
base = `http://127.0.0.1:${addr.port}`;
});

afterAll(async () => {
await new Promise<void>((resolve, reject) =>
server.close((err) => (err ? reject(err) : resolve()))
);
});

describe("localOnlyFetch (integration, real loopback server)", () => {
it("returns the body from a 200 loopback endpoint", async () => {
const res = await localOnlyFetch(`${base}/ok`, undefined, "TestProvider");
expect(res.status).toBe(200);
expect(await res.text()).toBe("integration-ok");
});

it("rejects rather than following a 302 from the loopback backend", async () => {
await expect(
localOnlyFetch(`${base}/redirect`, undefined, "TestProvider")
).rejects.toThrow();
});
});

describe("normalizeLoopbackHttpUrl (config-time gate)", () => {
it("throws on an RFC 1918 base URL", () => {
expect(() => normalizeLoopbackHttpUrl("http://10.0.0.5:9000", "X")).toThrow();
});

it("accepts a loopback base URL", () => {
expect(normalizeLoopbackHttpUrl("http://127.0.0.1:9000/v1", "X")).toBe(
"http://127.0.0.1:9000/v1"
);
});
});
Loading