Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
77 changes: 77 additions & 0 deletions packages/realm-server/lib/serialize-fatal-reason.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Render an arbitrary thrown value into a single human-readable string
// for FD-level fatal-exit logging in `worker.ts`. The output must be
// self-contained (no console formatting hooks, no Error toString
// reflection that can throw) because it gets handed straight to
// `writeSync(2, ...)` on a hot path right before `process.exit(1)`.
//
// What we want preserved across the pipe:
// - the error's name + message (or stringified non-Error value)
// - the stack trace (otherwise we still can't see where it threw)
// - the cause chain (Node fetch errors and many internal libs
// stash the real reason there; ECONNRESET, TLS errors, etc.)
//
// Newlines inside the rendered output are fine — the caller's wrapping
// `[worker] FATAL ... : <this>\n` is the only line boundary readers
// of `worker-manager.ts`'s stderr-data tee actually expect.
// `Error.cause` is an ES2022 field. The realm-server package still
// targets a slightly older lib in tsconfig (the runtime has had it
// since Node 16.9, but the type definitions don't expose it), so we
// reach for it through a structural cast.
type WithCause = { cause?: unknown };

export function serializeFatalReason(reason: unknown): string {
// Defense in depth: anything inside that throws (e.g.
// `String(Object.create(null))` synthesizing a `TypeError` because
// a prototype-less object has no `toString`/`valueOf`, or a user
// type whose `toString` itself throws) would otherwise propagate
// out into `fatalExit` after it has already set
// `isFatalHandlerRunning = true`, and the resulting re-entered
// `uncaughtException` would early-return without finalizing the
// reservation or calling `process.exit(1)`. That leaves the worker
// alive in a broken state, holding its pg-queue reservation. The
// fatal path must never throw, so swallow everything here.
try {
if (!(reason instanceof Error)) {
return safeString(reason);
}
let parts: string[] = [];
parts.push(reason.stack ?? `${reason.name}: ${reason.message}`);
let cause: unknown = (reason as WithCause).cause;
// Walk the cause chain. Defensive bound prevents a (pathological)
// self-referential cause from looping forever.
let depth = 0;
while (cause !== undefined && depth < 8) {
if (cause instanceof Error) {
parts.push(
`Caused by: ${cause.stack ?? `${cause.name}: ${cause.message}`}`,
);
cause = (cause as WithCause).cause;
} else {
parts.push(`Caused by: ${safeString(cause)}`);
cause = undefined;
}
depth += 1;
}
return parts.join('\n');
} catch (innerErr) {
let innerMsg: string;
try {
innerMsg =
innerErr instanceof Error ? innerErr.message : safeString(innerErr);
} catch {
innerMsg = 'unknown';
}
return `[serializeFatalReason failed: ${innerMsg}]`;
}
}

// `String(value)` throws on a prototype-less object (no `toString` /
// `valueOf` to call), and on any value whose own `toString` throws.
// Wrap the call so callers can rely on getting a string back.
function safeString(value: unknown): string {
try {
return String(value);
} catch {
return '[unstringifiable value]';
}
}
1 change: 1 addition & 0 deletions packages/realm-server/tests/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ const ALL_TEST_FILES: string[] = [
'./queue-test',
'./finalize-orphan-reservations-test',
'./finalize-child-fatal-failure-test',
'./serialize-fatal-reason-test',
'./screenshot-card-test',
'./run-command-task-test',
'./realm-endpoints-test',
Expand Down
146 changes: 146 additions & 0 deletions packages/realm-server/tests/serialize-fatal-reason-test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
import { module, test } from 'qunit';
import { basename } from 'path';
import { serializeFatalReason } from '../lib/serialize-fatal-reason';

// Pure-function tests for the renderer that worker.ts hands to
// `writeSync(2, ...)` on the fatal-exit path (CS-11200). We can't
// reasonably unit-test the actual FD-level write behavior — that
// requires a real child_process.spawn + libuv-piped stderr, and the
// bug it fixes only manifests when the child does `process.exit(1)`
// before libuv flushes. The verifiable part is the serialization:
// the stack is preserved, and the `error.cause` chain (where Node
// fetch / undici / TLS errors actually live their reason) survives.

module(basename(__filename), function () {
test('preserves the stack trace for an Error', function (assert) {
let err = new Error('boom');
let out = serializeFatalReason(err);
assert.ok(/Error: boom/.test(out), 'message+name appears in output');
assert.ok(
/serialize-fatal-reason-test/.test(out),
'stack frames are included',
);
});

test('falls back to name+message when stack is absent', function (assert) {
let err = new Error('no-stack');
delete (err as { stack?: string }).stack;
let out = serializeFatalReason(err);
assert.strictEqual(out, 'Error: no-stack');
});

test('renders a non-Error value via String()', function (assert) {
assert.strictEqual(serializeFatalReason('plain string'), 'plain string');
assert.strictEqual(serializeFatalReason(42), '42');
assert.strictEqual(serializeFatalReason(null), 'null');
assert.strictEqual(serializeFatalReason(undefined), 'undefined');
});

test('walks the cause chain (Node fetch surfaces real reason on .cause)', function (assert) {
let socketErr = new Error('ECONNRESET: socket hang up');
delete (socketErr as { stack?: string }).stack;
// Build via assignment rather than `new TypeError(msg, { cause })`
// because the package's TS lib target predates ES2022's
// ErrorOptions constructor signature.
let fetchErr = new TypeError('fetch failed') as TypeError & {
cause?: unknown;
};
fetchErr.cause = socketErr;
delete (fetchErr as { stack?: string }).stack;
let out = serializeFatalReason(fetchErr);
assert.ok(
/TypeError: fetch failed/.test(out),
'top-level message included',
);
assert.ok(
/Caused by: Error: ECONNRESET: socket hang up/.test(out),
'cause is rendered with the Caused by prefix',
);
});

test('handles a non-Error cause value', function (assert) {
let err = new Error('outer') as Error & { cause?: unknown };
err.cause = { code: 'ENOTFOUND', message: 'whatever' };
let out = serializeFatalReason(err);
assert.ok(/Caused by: \[object Object\]/.test(out));
});

test('does not loop on a self-referential cause', function (assert) {
let err = new Error('cyclic') as Error & { cause?: unknown };
err.cause = err;
let out = serializeFatalReason(err);
assert.ok(/Error: cyclic/.test(out));
// 8 cause-line cap + the top-level line
let causedByCount = (out.match(/Caused by:/g) ?? []).length;
assert.ok(
causedByCount <= 8,
`cause chain capped at 8, saw ${causedByCount}`,
);
});

test('does not throw on a prototype-less rejection value', function (assert) {
// `String(Object.create(null))` throws TypeError because the
// prototype-less object has neither `toString` nor `valueOf` for
// `OrdinaryToPrimitive` to call. Libraries do occasionally
// `Promise.reject` such values; the fatal-exit path cannot
// tolerate a throw here. Code review caught this on PR #4906.
let weird = Object.create(null) as object;
let out: string;
assert.ok(
((): boolean => {
try {
out = serializeFatalReason(weird);
return true;
} catch {
return false;
}
})(),
'serializeFatalReason did not throw',
);
assert.strictEqual(typeof out!, 'string', 'returned a string fallback');
});

test('does not throw when the value’s own toString throws', function (assert) {
let hostile = {
toString() {
throw new Error('toString blew up');
},
};
let out: string;
assert.ok(
((): boolean => {
try {
out = serializeFatalReason(hostile);
return true;
} catch {
return false;
}
})(),
'serializeFatalReason did not throw on a hostile toString',
);
assert.strictEqual(typeof out!, 'string');
});

test('does not throw when an Error’s cause has a hostile toString', function (assert) {
let hostileCause = {
toString() {
throw new Error('cause toString blew up');
},
};
let err = new Error('outer') as Error & { cause?: unknown };
err.cause = hostileCause;
let out: string;
assert.ok(
((): boolean => {
try {
out = serializeFatalReason(err);
return true;
} catch {
return false;
}
})(),
'serializeFatalReason did not throw when walking the cause chain',
);
assert.ok(/outer/.test(out!), 'top-level message still present');
});
});
54 changes: 48 additions & 6 deletions packages/realm-server/worker.ts
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,21 @@ import {
import { createRemotePrerenderer } from './prerender/remote-prerenderer';
import { buildCreatePrerenderAuth } from './prerender/auth';
import { finalizeChildReservationAsFailure } from './lib/finalize-child-fatal-failure';
import { serializeFatalReason } from './lib/serialize-fatal-reason';

// Belt-and-suspenders wrapper around serializeFatalReason for the
// fatal-exit paths below. The helper already swallows its own throws,
// but a regression in it (or in any future refactor) would re-enter
// uncaughtException after `isFatalHandlerRunning` is set and strand
// the worker. The fatal path cannot tolerate a throw, so wrap once
// more here.
function safeSerialize(reason: unknown): string {
try {
return serializeFatalReason(reason);
} catch {
return '[serializeFatalReason threw]';
}
}

let log = logger('worker');

Expand Down Expand Up @@ -253,7 +268,24 @@ let autoMigrate = migrateDB || undefined;
return;
}
isFatalHandlerRunning = true;
log.error(`Fatal ${source} in worker child ${workerId}:`, reason as Error);
// FD-level synchronous write for the same reason as the STARTUP
// stamp at the top of this file: stderr is libuv-async when piped
// to worker-manager, so `log.error` here can be dropped before the
// child exits via `process.exit(1)` below. Without this, the
// captured server log shows the child as having silently exited
// `code=1, signal=null` and we lose the actual stack trace (see
// CS-11200).
//
// serializeFatalReason() already swallows its own throws, but the
// fatal-exit path is critical enough that we double-wrap each
// call: any throw between `isFatalHandlerRunning = true` and
// `process.exit(1)` becomes a re-entered uncaughtException, which
// sees the flag set and early-returns, stranding the worker with
// its reservation still held.
writeSync(
2,
`[worker] FATAL ${source} pid=${process.pid} workerId=${workerId}: ${safeSerialize(reason)}\n`,
);
try {
Sentry.captureException(reason);
} catch {
Expand All @@ -266,7 +298,10 @@ let autoMigrate = migrateDB || undefined;
new Promise<void>((r) => setTimeout(r, 5000).unref()),
]);
} catch (e) {
log.error(`Fatal handler finalize failed for ${workerId}:`, e);
writeSync(
2,
`[worker] FATAL finalize-failed pid=${process.pid} workerId=${workerId}: ${safeSerialize(e)}\n`,
);
} finally {
process.exit(1);
}
Expand All @@ -279,10 +314,17 @@ let autoMigrate = migrateDB || undefined;
fatalExit(err, 'uncaughtException');
});
})().catch((e: any) => {
Sentry.captureException(e);
log.error(
`worker: Unexpected error encountered starting worker, stopping worker`,
e,
try {
Sentry.captureException(e);
} catch {
// best-effort
}
// Same FD-level rationale as fatalExit above: this is the startup
// path's fatal-exit, and `log.error` immediately before
// `process.exit(1)` can be lost on the libuv-async stderr pipe.
writeSync(
2,
`[worker] FATAL startup-error pid=${process.pid}: ${safeSerialize(e)}\n`,
);
process.exit(1);
});
Loading