Skip to content
141 changes: 140 additions & 1 deletion src/cli/output/TableRenderer.test.ts
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
import { describe, expect, it } from "bun:test";
import { renderTable } from "./TableRenderer";
import { renderTable, __testing } from "./TableRenderer";
import type { ColumnDef, RenderOptions } from "./TableRenderer";

const { escapeCsvValue } = __testing;

const columns: ReadonlyArray<ColumnDef> = [
{ key: "id", header: "ID", width: 6 },
{ key: "name", header: "Name", width: 10 },
Expand Down Expand Up @@ -154,3 +156,140 @@ describe("renderTable", () => {
});
});
});

describe("escapeCsvValue", () => {
// Per OWASP CSV Injection (https://owasp.org/www-community/attacks/CSV_Injection),
// cells beginning with =/+/-/@ (or TAB/CR after whitespace stripping) must be
// neutralized with a leading single-quote before being handed to a spreadsheet.

it("prefixes a single-quote when value starts with '='", () => {
expect(escapeCsvValue("=cmd")).toBe("'=cmd");
});

it("prefixes a single-quote when value starts with '+'", () => {
expect(escapeCsvValue("+cmd")).toBe("'+cmd");
});

it("prefixes a single-quote when value starts with '-'", () => {
expect(escapeCsvValue("-cmd")).toBe("'-cmd");
});

it("prefixes a single-quote when value starts with '@'", () => {
expect(escapeCsvValue("@cmd")).toBe("'@cmd");
});

it("prefixes a single-quote when value starts with TAB", () => {
// \t is a dangerous lead in its own right (some loaders strip it
// before formula parsing). Cell contains no comma/quote/CR/LF so the
// result is NOT RFC 4180 quoted.
expect(escapeCsvValue("\tcmd")).toBe("'\tcmd");
});

it("quotes (but does not prefix) a value that begins with bare CR", () => {
// After splitting on \r\n | \r | \n, "\rcmd" decomposes to
// ["", "\r", "cmd"]: the empty pre-CR line and the post-CR "cmd"
// line are both non-dangerous, so no apostrophe is needed; the cell
// is still RFC 4180 quoted because it contains a CR.
expect(escapeCsvValue("\rcmd")).toBe('"\rcmd"');
});

it("defuses leading ASCII space before '=' (spreadsheets strip leading WS)", () => {
expect(escapeCsvValue(" =cmd")).toBe("' =cmd");
});

it("defuses leading U+00A0 NBSP before '=' (spreadsheets strip NBSP too)", () => {
expect(escapeCsvValue(" =cmd")).toBe("' =cmd");
});

it("leaves plain alphabetic values unchanged", () => {
expect(escapeCsvValue("abc")).toBe("abc");
});

it("leaves plain numeric values unchanged", () => {
expect(escapeCsvValue("123")).toBe("123");
});

it("defuses a dangerous line after LF inside a multi-line cell", () => {
// Excel/Sheets re-parse every physical line of a quoted multi-line cell,
// so the second line must also be defused.
expect(escapeCsvValue("safe\n=cmd")).toBe('"safe\n\'=cmd"');
});

it("defuses a dangerous line after CRLF inside a multi-line cell", () => {
expect(escapeCsvValue("safe\r\n=cmd")).toBe('"safe\r\n\'=cmd"');
});

it("defuses a dangerous line after a bare CR inside a multi-line cell", () => {
// Splitting on \r\n | \r | \n means the second physical line
// ("=cmd") is independently defused even when the separator is
// a lone carriage return.
expect(escapeCsvValue("safe\r=cmd")).toBe('"safe\r\'=cmd"');
});

it("quotes — but does not prefix — bare CR followed by a non-dangerous line", () => {
// The post-CR line ("cmd") is not a formula lead, so no apostrophe;
// the cell still needs RFC 4180 quoting because it contains a CR.
expect(escapeCsvValue("safe\rcmd")).toBe('"safe\rcmd"');
});

it("defuses every dangerous follow-up line, leaving safe interleaved lines alone", () => {
const input = "safe\n=danger1\nstillsafe\n+danger2\n@danger3";
const expected =
'"safe\n\'=danger1\nstillsafe\n\'+danger2\n\'@danger3"';
expect(escapeCsvValue(input)).toBe(expected);
});

it("wraps cells that contain a comma in double quotes", () => {
expect(escapeCsvValue("a,b")).toBe('"a,b"');
});

it("doubles embedded double-quotes inside a wrapped cell", () => {
expect(escapeCsvValue('he said "hi"')).toBe('"he said ""hi"""');
});

it("returns an empty string unchanged", () => {
expect(escapeCsvValue("")).toBe("");
});

// Zero-width / format-control characters that spreadsheets silently
// ignore at the start of a cell — each must be stripped before the
// formula-lead check so attackers can't sneak a leading "=cmd" past us.

it("defuses leading U+200B ZWSP before '='", () => {
expect(escapeCsvValue("​=cmd")).toBe("'​=cmd");
});

it("defuses leading U+200C ZWNJ before '='", () => {
expect(escapeCsvValue("‌=cmd")).toBe("'‌=cmd");
});

it("defuses leading U+200D ZWJ before '='", () => {
expect(escapeCsvValue("‍=cmd")).toBe("'‍=cmd");
});

it("defuses leading U+200E LRM before '='", () => {
expect(escapeCsvValue("‎=cmd")).toBe("'‎=cmd");
});

it("defuses leading U+200F RLM before '='", () => {
expect(escapeCsvValue("‏=cmd")).toBe("'‏=cmd");
});

it("defuses leading U+00AD SHY before '='", () => {
expect(escapeCsvValue("­=cmd")).toBe("'­=cmd");
});

it("defuses leading U+FEFF BOM before '='", () => {
expect(escapeCsvValue("=cmd")).toBe("'=cmd");
});

it("defuses ZWSP + ASCII space + '=' (mixed leading-WS bypass)", () => {
expect(escapeCsvValue("​ =cmd")).toBe("'​ =cmd");
});

it("leaves ZWSP followed by a benign char unchanged", () => {
// Negative control — stripping leading ZWSP must NOT cause prefixing
// of cells that aren't actually formulas after the strip.
expect(escapeCsvValue("​abc")).toBe("​abc");
});
});
71 changes: 58 additions & 13 deletions src/cli/output/TableRenderer.ts
Original file line number Diff line number Diff line change
Expand Up @@ -34,21 +34,63 @@ function pad(value: string, width: number): string {
return truncate(value, width).padEnd(width);
}

// Characters that trigger formula interpretation when found at the start of a
// spreadsheet cell. See OWASP CSV Injection
// (https://owasp.org/www-community/attacks/CSV_Injection).
const DANGEROUS_LEAD = /^[=+\-@\t\r]/;
// Strip only space-like characters spreadsheets silently ignore.
// Excludes \t and \r — those are themselves dangerous formula leads.
const LEADING_WS = /^[ ­​‌‍‎‏]+/;

function needsFormulaPrefix(line: string): boolean {
return DANGEROUS_LEAD.test(line.replace(LEADING_WS, ""));
}

function defuseLine(line: string): string {
return needsFormulaPrefix(line) ? "'" + line : line;
}

/**
* Defuse CSV/spreadsheet formula injection per OWASP CSV Injection guidance.
*
* When Excel/Sheets/Numbers open a CSV, a cell starting with =/+/-/@ (or TAB/CR
* which some loaders strip) is interpreted as a formula and can exfiltrate data
* via WEBSERVICE/HYPERLINK or run external commands. Prefixing a single quote
* neutralizes the formula — spreadsheets render the apostrophe as a literal and
* hide the prefix; plain CSV consumers see one extra leading apostrophe.
*
* The naive `^[=+\-@\t\r]` check has three bypasses we handle here:
* 1. Leading ASCII whitespace (" =cmd...") plus other space-like chars
* that spreadsheets silently strip (NBSP, SHY, ZWSP, ZWNJ, ZWJ, LRM,
* RLM, BOM). Tab and CR are themselves dangerous leads and are NOT
* stripped here.
* 2. Dangerous char after an embedded newline in a quoted multi-line cell
* ("safe\n=cmd") — each physical line is re-parsed.
* 3. Bare CR (\r) as a line separator inside a quoted multi-line cell —
* the line after the CR also re-parses, so split on \r\n | \r | \n.
*
* Each line of the value is defused independently; the result is then RFC 4180
* quoted if it contains a comma, quote, CR, or LF.
*/
function escapeCsvValue(value: string): string {
// Defuse CSV/spreadsheet formula injection. When Excel/Sheets/Numbers
// open a CSV, a cell starting with =/+/-/@ (or with leading TAB/CR
// that some loaders strip) is interpreted as a formula, which can
// exfiltrate data via WEBSERVICE/HYPERLINK or run external commands.
// Prefix a single quote — spreadsheets render it as a literal and hide
// the prefix; plain CSV consumers see the original text with one
// leading apostrophe, which is a small price for not shipping a known
// attack vector.
const dangerous = value.length > 0 && /^[=+\-@\t\r]/.test(value);
let escaped = dangerous ? "'" + value : value;
if (escaped.includes(",") || escaped.includes('"') || escaped.includes("\n")) {
return '"' + escaped.replace(/"/g, '""') + '"';
if (value.length === 0) {
return value;
}
return escaped;
// Capturing-group split preserves the separators at odd indices so we can
// round-trip the exact line endings (LF, CRLF, or bare CR) the caller used.
const parts = value.split(/(\r\n|\r|\n)/);
const defused = parts
.map((part, i) => (i % 2 === 0 ? defuseLine(part) : part))
.join("");
if (
defused.includes(",") ||
defused.includes('"') ||
defused.includes("\n") ||
defused.includes("\r")
) {
return '"' + defused.replace(/"/g, '""') + '"';
}
return defused;
}

function cellValue(row: Record<string, unknown>, key: string): string {
Expand Down Expand Up @@ -128,3 +170,6 @@ export function renderTable(
return renderTextTable(rows, columns, options);
}
}

// Exported for unit tests; not part of the module's public API.
export const __testing = { escapeCsvValue };
126 changes: 126 additions & 0 deletions src/sec/forms/_valueHelpers.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,126 @@
/**
* @license
* Copyright 2026 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

import { describe, expect, it } from "bun:test";
import { numScalar, numWrapped, strScalar, strWrapped } from "./_valueHelpers";

describe("strScalar", () => {
it("returns null for undefined", () => {
expect(strScalar(undefined)).toBe(null);
});
it("returns null for null", () => {
expect(strScalar(null)).toBe(null);
});
it("returns null for empty string", () => {
expect(strScalar("")).toBe(null);
});
it("returns null for whitespace-only string", () => {
expect(strScalar(" ")).toBe(null);
});
it("trims surrounding whitespace", () => {
expect(strScalar(" hi ")).toBe("hi");
});
it("preserves non-empty values", () => {
expect(strScalar("hi")).toBe("hi");
});
});

describe("numScalar", () => {
it("returns null for undefined", () => {
expect(numScalar(undefined)).toBe(null);
});
it("returns null for null", () => {
expect(numScalar(null)).toBe(null);
});
it("returns null for empty string", () => {
expect(numScalar("")).toBe(null);
});
it("returns null for whitespace-only string", () => {
expect(numScalar(" ")).toBe(null);
});
it("returns 0 for the literal string '0' (regression guard)", () => {
// The bug we are guarding against is exactly that "0" used to be
// indistinguishable from `""` after Value.Convert, both becoming 0;
// legitimate "0" must still survive.
expect(numScalar("0")).toBe(0);
});
it("parses negative decimals", () => {
expect(numScalar("-1.5")).toBe(-1.5);
});
it("returns null for 'NaN'", () => {
expect(numScalar("NaN")).toBe(null);
});
it("returns null for 'Infinity'", () => {
// Number("Infinity") is finite-typed but Number.isFinite returns
// false; we coerce that to null so a downstream `> 0` check doesn't
// see an Infinity row.
expect(numScalar("Infinity")).toBe(null);
});
it("returns null for thousand-separator strings (Number rejects them)", () => {
// "1,000.50" is Number-rejected (NaN). We choose to drop rather
// than guess a locale; callers that want comma-handling must
// pre-clean.
expect(numScalar("1,000.50")).toBe(null);
});
it("parses ordinary integers", () => {
expect(numScalar("42")).toBe(42);
});
it("parses very large but finite doubles", () => {
expect(numScalar("1e308")).toBe(1e308);
});
it("returns null for 1e309 (overflows to Infinity)", () => {
expect(numScalar("1e309")).toBe(null);
});
it("accepts already-numeric inputs", () => {
expect(numScalar(42)).toBe(42);
expect(numScalar(0)).toBe(0);
});
});

describe("strWrapped", () => {
it("returns null for undefined wrapper", () => {
expect(strWrapped(undefined)).toBe(null);
});
it("returns null for null wrapper", () => {
expect(strWrapped(null)).toBe(null);
});
it("returns null when .value is empty", () => {
expect(strWrapped({ value: "" })).toBe(null);
});
it("returns null when .value is whitespace-only", () => {
expect(strWrapped({ value: " " })).toBe(null);
});
it("unwraps and trims a non-empty .value", () => {
expect(strWrapped({ value: " hi " })).toBe("hi");
});
});

describe("numWrapped", () => {
it("returns null for undefined wrapper", () => {
expect(numWrapped(undefined)).toBe(null);
});
it("returns null for null wrapper", () => {
expect(numWrapped(null)).toBe(null);
});
it("returns null when .value is empty", () => {
expect(numWrapped({ value: "" })).toBe(null);
});
it("returns null when .value is whitespace-only", () => {
expect(numWrapped({ value: " " })).toBe(null);
});
it("returns 0 for .value === '0'", () => {
expect(numWrapped({ value: "0" })).toBe(0);
});
it("parses .value === '-1.5'", () => {
expect(numWrapped({ value: "-1.5" })).toBe(-1.5);
});
it("returns null for .value === 'NaN'", () => {
expect(numWrapped({ value: "NaN" })).toBe(null);
});
it("returns null for .value === '1e309'", () => {
expect(numWrapped({ value: "1e309" })).toBe(null);
});
});
Loading