Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 57 additions & 0 deletions .github/workflows/update-canary-snapshots.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
name: Update canary snapshots

on:
schedule:
- cron: "0 8 * * 1"
workflow_dispatch:

permissions:
contents: write
pull-requests: write

jobs:
update:
runs-on: ubuntu-latest
timeout-minutes: 90
steps:
- uses: actions/checkout@34e114876b0b11c390a56381ad16ebd13914f8d5 # v4.3.1
- uses: actions/setup-node@49933ea5288caeca8642d1e84afbd3f7d6820020 # v4.4.0
with:
node-version-file: .tool-versions
- name: Setup pnpm
uses: pnpm/action-setup@fc06bc1257f339d1d5d8b3a19a8cae5388b55320 # v5.0.0
- name: Install dependencies
run: pnpm install --frozen-lockfile
- name: Build
run: pnpm run build
- name: Update canary snapshots
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
COHERE_API_KEY: ${{ secrets.COHERE_API_KEY }}
COPILOT_API_KEY: ${{ secrets.COPILOT_API_KEY }}
CURSOR_API_KEY: ${{ secrets.CURSOR_API_KEY }}
GEMINI_API_KEY: ${{ secrets.GEMINI_API_KEY }}
GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}
HUGGINGFACE_API_KEY: ${{ secrets.HUGGINGFACE_API_KEY }}
MISTRAL_API_KEY: ${{ secrets.MISTRAL_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
OPENROUTER_API_KEY: ${{ secrets.OPENROUTER_API_KEY }}
run: node e2e/scripts/run-canary-tests-docker.mjs --update
- name: Open PR if snapshots changed
env:
GH_TOKEN: ${{ github.token }}
run: |
if git diff --quiet -- 'e2e/scenarios'; then
echo "No canary snapshot changes."
exit 0
fi
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@users.noreply.github.com"
BRANCH="canary-snapshots/$(date +%Y%m%d)"
git checkout -b "$BRANCH"
git add e2e/scenarios
git commit -m "chore(e2e): update canary snapshots $(date +%Y-%m-%d)"
git push origin "$BRANCH"
gh pr create \
--title "chore(e2e): update canary snapshots $(date +%Y-%m-%d)" \
--body "Automated canary snapshot update. Review for unexpected structural changes in provider instrumentation before merging."
1 change: 1 addition & 0 deletions dev-packages/seinfeld/src/internal/well-known-headers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ export const AUTH_HEADERS = [
"api-key",
"x-api-key",
"x-anthropic-api-key",
"x-goog-api-key",
"cookie",
"set-cookie",
"proxy-authorization",
Expand Down
22 changes: 21 additions & 1 deletion dev-packages/seinfeld/src/msw.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,11 @@ export async function buildResponse(
ctx?: { store: CassetteStore; name: string },
): Promise<Response> {
// Expand \n-joined set-cookie back into multiple header entries.
const headers = expandSetCookieHeader(recorded.headers);
// Strip encoding headers: stored bytes are already decoded by the fetch
// layer (undici decompresses gzip/deflate before handing the body to MSW
// handlers). Preserving content-encoding would cause callers to attempt a
// second decode of already-plain bytes, which throws a zlib error.
const headers = expandSetCookieHeader(stripEncodingHeaders(recorded.headers));
const init: ResponseInit = { status: recorded.status, headers };
if (recorded.statusText) init.statusText = recorded.statusText;
// 1xx/204/304 responses must not have a body, per Fetch spec.
Expand Down Expand Up @@ -169,6 +173,22 @@ export async function buildResponse(
return new Response(buffer, init);
}

const ENCODING_HEADERS = new Set([
"content-encoding",
"transfer-encoding",
"content-length",
]);

function stripEncodingHeaders(
headers: Record<string, string>,
): Record<string, string> {
return Object.fromEntries(
Object.entries(headers).filter(
([key]) => !ENCODING_HEADERS.has(key.toLowerCase()),
),
);
}

/**
* Return a `[string, string][]` header list from a `Record<string, string>`,
* splitting any `\n`-delimited `set-cookie` value back into separate entries.
Expand Down
19 changes: 17 additions & 2 deletions dev-packages/seinfeld/src/recorder.ts
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ import type {
CassetteEntry,
CassetteMode,
RecordedRequest,
RecordedResponse,
} from "./cassette";
import { AggregateCassetteMissError, CassetteMissError } from "./errors";
import { CURRENT_FORMAT_VERSION } from "./format";
Expand Down Expand Up @@ -221,8 +222,22 @@ async function handleRecord(
response: redactedResponse,
});

// Return the real response to the caller. recordResponseDraft only used
// .clone() internally, so realResponse body is still available to clone.
// Return the response to the caller.
//
// For non-draft bodies (JSON, text, empty, SSE), build a fresh Response
// from the already-captured bytes. This avoids a Node.js/undici issue
// where realResponse.clone() after recordResponseDraft() (which already
// teed the body stream) can return an empty body, causing callers to
// misparse the response.
//
// For binary-draft bodies (large responses above the threshold), the
// bytes haven't been materialised yet, so fall back to realResponse.clone().
if (captured.body.kind !== "binary-draft") {
return buildResponse(captured as unknown as RecordedResponse, {
store: ctx.store,
name: ctx.name,
});
}
return realResponse.clone();
}

Expand Down
11 changes: 10 additions & 1 deletion dev-packages/seinfeld/src/redactor/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,13 @@ export interface RedactionConfig {
* forms, or in SSE event lines that are not JSON.
*/
redactBodyText?: Array<RegExp | { pattern: RegExp; replacement?: string }>;
/**
* When `true`, all request headers are omitted from the persisted cassette.
* Request headers are never used for replay matching (only method, URL, and
* body are compared), so dropping them entirely is a safe way to prevent
* credentials from leaking into cassette files committed to version control.
*/
omitRequestHeaders?: boolean;
/** Custom request transform run after declarative redaction. */
redactRequest?: (req: RecordedRequest) => RecordedRequest;
/** Custom response transform run after declarative redaction. */
Expand Down Expand Up @@ -172,7 +179,9 @@ function applyRequestRedactionConfig(
): RecordedRequest {
let result: RecordedRequest = req;

if (config.redactHeaders && config.redactHeaders.length > 0) {
if (config.omitRequestHeaders) {
result = { ...result, headers: {} };
} else if (config.redactHeaders && config.redactHeaders.length > 0) {
result = {
...result,
headers: maskHeaders(result.headers, config.redactHeaders),
Expand Down
1 change: 1 addition & 0 deletions dev-packages/seinfeld/src/redactor/presets.ts
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ const AGGRESSIVE_REDACTION: RedactionConfig = {
* APIs use per-request credentials that could appear in response bodies.
*/
const PARANOID_REDACTION: RedactionConfig = {
omitRequestHeaders: true,
redactHeaders: CREDENTIAL_HEADERS,
redactBodyFields: [/^(api_?key|token|secret|password|authorization)$/i],
redactBodyText: [
Expand Down
4 changes: 2 additions & 2 deletions dev-packages/seinfeld/test/redactor.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -355,9 +355,9 @@ describe("resolveRedactors", () => {
});

describe("'paranoid' preset", () => {
it("masks credential headers", () => {
it("drops all request headers", () => {
const out = applyRequestRedaction(baseReq, "paranoid");
expect(out.headers.Authorization).toBe(REDACTED_SENTINEL);
expect(out.headers).toEqual({});
});

it("masks common credential field names in JSON bodies", () => {
Expand Down
4 changes: 2 additions & 2 deletions dev-packages/seinfeld/test/store/file-store.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -62,15 +62,15 @@ describe("createJsonFileStore", () => {
const store = createJsonFileStore({ rootDir: dir });
await store.save("demo", makeCassette());
const raw = await readFile(join(dir, "demo.cassette.json"), "utf8");
expect(raw).toMatch(/^\{\n {2}"version": 1/);
expect(raw).toMatch(/^\{\n {2}"entries":/);
expect(raw.endsWith("\n")).toBe(true);
});

it("writes compact JSON when pretty=false", async () => {
const store = createJsonFileStore({ rootDir: dir, pretty: false });
await store.save("demo", makeCassette());
const raw = await readFile(join(dir, "demo.cassette.json"), "utf8");
expect(raw.startsWith('{"version":1')).toBe(true);
expect(raw.startsWith('{"entries":')).toBe(true);
expect(raw.endsWith("\n")).toBe(false);
});

Expand Down
80 changes: 79 additions & 1 deletion e2e/helpers/cassette-preload.mjs
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,59 @@ async function bootCassettePreload(cassetteDir) {

await cassette.start();

process.on("beforeExit", async () => {
if (mode === "record") {
installRecordModeGuard(cassette);
} else {
process.on("beforeExit", async () => {
try {
await cassette.stop();
} catch (err) {
process.stderr.write(
`[cassette] stop error: ${err instanceof Error ? err.message : String(err)}\n`,
);
process.exit(1);
}
});
}
}

/**
* In record mode, `beforeExit` can fire in the gap between sequential HTTP
* calls (e.g. ADK's two-step tool-call flow: first call returns a functionCall,
* tool executes synchronously, then the second call sends the result). The gap
* between calls has no pending I/O, so the event loop empties and `beforeExit`
* fires prematurely — causing only a partial cassette to be saved.
*
* Fix: wrap `globalThis.fetch` (after MSW has installed its proxy) to track
* in-flight request count. A drain timer is set after each request completes
* and reset when the next request starts. `cassette.stop()` is only called
* when the drain timer fires with no in-flight requests, guaranteeing all
* sequential HTTP calls have been captured before we flush.
*
* @param {import("@braintrust/seinfeld").Cassette} cassette
*/
function installRecordModeGuard(cassette) {
// How long to wait after the last HTTP call before flushing the cassette.
// Must be long enough for the scenario to initiate the next sequential
// request. For ADK tool-call flows, the event loop empties between calls
// (MSW doesn't maintain a keep-alive socket) so the drain delay must be
// large enough to cover any gap between sequential Gemini API calls.
const DRAIN_DELAY_MS = 2000;

const mswFetch = globalThis.fetch;
let inFlight = 0;
/** @type {ReturnType<typeof setTimeout> | null} */
let drainTimer = null;
let stopping = false;

async function stopOnDrain() {
if (stopping) return;
if (inFlight > 0) return; // new request started before drain fired
stopping = true;
if (drainTimer) {
clearTimeout(drainTimer);
drainTimer = null;
}
try {
await cassette.stop();
} catch (err) {
Expand All @@ -59,6 +111,32 @@ async function bootCassettePreload(cassetteDir) {
);
process.exit(1);
}
}

function scheduleDrain() {
if (stopping) return;
if (drainTimer) clearTimeout(drainTimer);
drainTimer = setTimeout(stopOnDrain, DRAIN_DELAY_MS);
}

globalThis.fetch = async function recordGuardFetch(input, init) {
// Cancel any pending drain — a new request is starting.
if (drainTimer) {
clearTimeout(drainTimer);
drainTimer = null;
}
inFlight++;
try {
return await mswFetch(input, init);
} finally {
inFlight--;
scheduleDrain();
}
};

// Fallback: if the scenario makes no HTTP calls, still flush the cassette.
process.on("beforeExit", async () => {
await stopOnDrain();
});
}

Expand Down
29 changes: 28 additions & 1 deletion e2e/helpers/file-snapshot.ts
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
import { mkdirSync, writeFileSync } from "node:fs";
import { dirname, join } from "node:path";
import { fileURLToPath } from "node:url";
import { expect } from "vitest";
import { isCanaryMode } from "./scenario-installer";
import { normalizeForSnapshot, type Json } from "./normalize";

function sortJsonKeys(value: Json): Json {
Expand All @@ -22,9 +25,33 @@ export function formatJsonFileSnapshot(value: Json): string {
return `${JSON.stringify(sortJsonKeys(normalizeForSnapshot(value)), null, 2)}\n`;
}

export async function matchFileSnapshot(
value: string,
path: string,
): Promise<void> {
// In canary mode always write the snapshot and pass — never fail on content
// differences. The e2e-canary job catches live API failures; snapshot drift
// is surfaced separately by the update-canary-snapshots PR workflow.
if (isCanaryMode()) {
mkdirSync(dirname(path), { recursive: true });
writeFileSync(path, value, "utf8");
return;
}
await expect(value).toMatchFileSnapshot(path);
}

export function resolveFileSnapshotPath(
testModuleUrl: string,
filename: string,
): string {
return join(dirname(fileURLToPath(testModuleUrl)), "__snapshots__", filename);
// Canary tests use the latest provider versions, which may produce different
// span shapes. Keep their snapshots separate so pinned and canary baselines
// can diverge independently.
const subdir = isCanaryMode() ? "canary" : "";
return join(
dirname(fileURLToPath(testModuleUrl)),
"__snapshots__",
subdir,
filename,
);
}
Loading
Loading