Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
73 changes: 73 additions & 0 deletions src/sec/forms/insider-trading/Form_144.storage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,79 @@ describe("Form 144 storage", () => {
expect(filing?.no_of_units_sold).toBe(129915);
});

it("stores null (not a fabricated 0) for a whitespace-only aggregateMarketValue", async () => {
const accession = "0001663266-26-000003";
const xml = readFileSync(
join(__dirname, "mock_data", "form-144", "000166326626000003-primary_doc.xml"),
"utf-8"
);
const doc = await Form_144.parse("144", xml);
// Filings have been observed with whitespace-only numeric elements, which
// the previous local num() coerced to 0 via Number(" ") and silently
// fabricated a market value.
doc.formData!.securitiesInformation!.aggregateMarketValue = " ";
await processForm144({
cik: 1534263,
file_number: "",
accession_number: accession,
filing_date: "2026-05-27",
primary_doc: "x.xml",
form: "144",
doc,
});

const filing = await repo.getFiling(accession);
expect(filing?.aggregate_market_value).toBeNull();
expect(filing?.no_of_units_sold).toBe(129915);
});

it("stores null (not a fabricated 0) for whitespace-only grossProceeds on a recent sale", async () => {
const accession = "0001663266-26-000003";
const xml = readFileSync(
join(__dirname, "mock_data", "form-144", "000166326626000003-primary_doc.xml"),
"utf-8"
);
const doc = await Form_144.parse("144", xml);
doc.formData!.securitiesSoldInPast3Months![0].grossProceeds = " ";
await processForm144({
cik: 1534263,
file_number: "",
accession_number: accession,
filing_date: "2026-05-27",
primary_doc: "x.xml",
form: "144",
doc,
});

const sales = await repo.getRecentSales(accession);
expect(sales[0].gross_proceeds).toBeNull();
expect(sales[0].amount_sold).toBe(16814);
});

it("stores null (not a fabricated 0) for whitespace-only amountOfSecuritiesAcquired", async () => {
const accession = "0001663266-26-000003";
const xml = readFileSync(
join(__dirname, "mock_data", "form-144", "000166326626000003-primary_doc.xml"),
"utf-8"
);
const doc = await Form_144.parse("144", xml);
doc.formData!.securitiesToBeSold![0].amountOfSecuritiesAcquired = " ";
await processForm144({
cik: 1534263,
file_number: "",
accession_number: accession,
filing_date: "2026-05-27",
primary_doc: "x.xml",
form: "144",
doc,
});

const acquisitions = await repo.getAcquisitions(accession);
expect(acquisitions[0].amount_acquired).toBeNull();
// The second acquisition's populated field is unaffected.
expect(acquisitions[1].amount_acquired).not.toBeNull();
});

it("clears stale rows when re-extracted with fewer acquisitions", async () => {
const accession = "0001663266-26-000003";
const xml = readFileSync(
Expand Down
17 changes: 4 additions & 13 deletions src/sec/forms/insider-trading/Form_144.storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -31,23 +31,12 @@ import { getActiveSlot } from "../../../storage/versioning/getActiveSlot";
import { formToExtractorId } from "../../../storage/versioning/extractorIds";
import { Form144Repo } from "../../../storage/form144/Form144Repo";
import type { Form144 } from "./Form_144.schema";
import { numScalar as num, strScalar as str } from "./_valueHelpers";

type AddressShape = NonNullable<
NonNullable<Form144["formData"]>["issuerInfo"]
>["issuerAddress"];

function str(s: string | undefined | null): string | null {
if (s === undefined || s === null) return null;
const t = String(s).trim();
return t === "" ? null : t;
}

function num(n: number | string | undefined | null): number | null {
if (n === undefined || n === null || n === "") return null;
const v = typeof n === "number" ? n : Number(n);
return Number.isFinite(v) ? v : null;
}

// EDGAR Y/N flags.
function toBoolYN(raw: string | undefined): boolean {
return str(raw)?.toUpperCase() === "Y";
Expand Down Expand Up @@ -113,7 +102,9 @@ export async function processForm144({

const activeResolverPersonVersion = personSlot?.semver ?? "1.0.0";
const activeResolverCompanyVersion = companySlot?.semver ?? "1.0.0";
const extractor_version = "1.0.0";
// 1.1.0: num() now treats whitespace-only numeric elements as null instead
// of fabricating 0 via Number(" "). Bumped to force production re-extract.
const extractor_version = "1.1.0";
const extractor_id = formToExtractorId(form) ?? "144";

const formData = doc.formData ?? {};
Expand Down
25 changes: 25 additions & 0 deletions src/sec/forms/insider-trading/OwnershipDocument.storage.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -249,6 +249,31 @@ describe("OwnershipDocument storage (Forms 3/4/5)", () => {
expect(nonDeriv.price_per_share).toBe(1.405);
});

it("stores null (not 0) for a whitespace-only transactionShares element", async () => {
const accession = "0001493152-26-025476";
const xml = readFileSync(
join(__dirname, "mock_data", "form-4", "000149315226025476-primary_doc.xml"),
"utf-8"
);
const doc = await Form_4.parse("4", xml);
const nonDerivTxn = doc.nonDerivativeTable!.nonDerivativeTransaction![0];
nonDerivTxn.transactionAmounts!.transactionShares!.value = " ";
await processOwnershipForm({
cik: 1828673,
file_number: "",
accession_number: accession,
filing_date: "2026-05-27",
primary_doc: "x.xml",
form: "4",
doc,
});

const txns = await repo.getTransactions(accession);
const nonDeriv = txns.find((t) => !t.is_derivative)!;
expect(nonDeriv.shares).toBeNull();
expect(nonDeriv.price_per_share).toBe(1.405);
});

it("stores null (not 0) for an empty transactionPricePerShare element", async () => {
const accession = "0001493152-26-025476";
const xml = readFileSync(
Expand Down
21 changes: 1 addition & 20 deletions src/sec/forms/insider-trading/OwnershipDocument.storage.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import { getActiveSlot } from "../../../storage/versioning/getActiveSlot";
import { isBadPersonField } from "../../../types/edgar/bad-data";
import { parseCikSafely } from "../../../util/parseCik";
import type { OwnershipDocument } from "./OwnershipDocument.schema";
import { numWrapped as num, strWrapped as str } from "./_valueHelpers";

// EDGAR ownership flags appear as "1"/"0" (X0609) or "true"/"false" (X0607).
function toBool(raw: string | undefined): boolean {
Expand All @@ -43,26 +44,6 @@ function toBool(raw: string | undefined): boolean {
return v === "1" || v === "true";
}

// Unwrap a `{ value }` leaf to its string, treating empty as null.
function str(field: { value?: string } | string | undefined): string | null {
if (field === undefined || field === null) return null;
if (typeof field === "string") return field.trim() || null;
const v = field.value;
return v === undefined || v === null || String(v).trim() === "" ? null : String(v).trim();
}

// Unwrap a `{ value }` leaf to a finite number, or null. The schema types the
// inner value as a string so that an empty XML element (parsed as "") survives
// Value.Convert intact and reaches this helper, which maps "" -> null. If we
// typed it as a number, Value.Convert would fabricate a 0 here instead.
function num(field: { value?: string } | string | undefined): number | null {
if (field === undefined || field === null || typeof field === "string") return null;
const v = field.value;
if (v === undefined || v === null || v.trim() === "") return null;
const n = Number(v);
return Number.isFinite(n) ? n : null;
}

interface OwnershipStorageContext {
readonly accession_number: string;
readonly extractor_id: string;
Expand Down
90 changes: 90 additions & 0 deletions src/sec/forms/insider-trading/_valueHelpers.test.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,90 @@
/**
* @license
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

import { describe, expect, it } from "bun:test";
import { numScalar, numWrapped, strScalar, strWrapped } from "./_valueHelpers";

describe("strScalar", () => {
it("returns null for undefined/null/empty/whitespace", () => {
expect(strScalar(undefined)).toBeNull();
expect(strScalar(null)).toBeNull();
expect(strScalar("")).toBeNull();
expect(strScalar(" ")).toBeNull();
expect(strScalar("\t\n")).toBeNull();
});
it("trims and returns a non-empty string", () => {
expect(strScalar("abc")).toBe("abc");
expect(strScalar(" 42 ")).toBe("42");
expect(strScalar("0")).toBe("0");
expect(strScalar(0)).toBe("0");
});
});

describe("numScalar", () => {
it("returns null for undefined/null/empty/whitespace", () => {
expect(numScalar(undefined)).toBeNull();
expect(numScalar(null)).toBeNull();
expect(numScalar("")).toBeNull();
expect(numScalar(" ")).toBeNull();
expect(numScalar("\t\n")).toBeNull();
});
it("coerces trimmed numeric strings", () => {
expect(numScalar("0")).toBe(0);
expect(numScalar(" 42 ")).toBe(42);
expect(numScalar("3.14")).toBe(3.14);
});
it("returns null for non-numeric input", () => {
expect(numScalar("abc")).toBeNull();
});
it("passes through finite numbers and rejects non-finite", () => {
expect(numScalar(7)).toBe(7);
expect(numScalar(Number.NaN)).toBeNull();
expect(numScalar(Number.POSITIVE_INFINITY)).toBeNull();
});
});

describe("strWrapped", () => {
it("returns null for undefined/null/empty/whitespace", () => {
expect(strWrapped(undefined)).toBeNull();
expect(strWrapped(null)).toBeNull();
expect(strWrapped("")).toBeNull();
expect(strWrapped(" ")).toBeNull();
expect(strWrapped("\t\n")).toBeNull();
});
it("unwraps {value} leaves with the same semantics", () => {
expect(strWrapped({ value: "abc" })).toBe("abc");
expect(strWrapped({ value: " 42 " })).toBe("42");
expect(strWrapped({ value: "0" })).toBe("0");
expect(strWrapped({ value: " " })).toBeNull();
expect(strWrapped({ value: undefined })).toBeNull();
expect(strWrapped({})).toBeNull();
});
it("accepts a bare string value", () => {
expect(strWrapped("abc")).toBe("abc");
});
});

describe("numWrapped", () => {
it("returns null for undefined/null and bare strings", () => {
expect(numWrapped(undefined)).toBeNull();
expect(numWrapped(null)).toBeNull();
// Bare string is a schema mismatch at a wrapped call site.
expect(numWrapped("42")).toBeNull();
expect(numWrapped("")).toBeNull();
expect(numWrapped(" ")).toBeNull();
});
it("unwraps {value} leaves and coerces with finite check", () => {
expect(numWrapped({ value: "0" })).toBe(0);
expect(numWrapped({ value: " 42 " })).toBe(42);
expect(numWrapped({ value: "3.14" })).toBe(3.14);
expect(numWrapped({ value: "abc" })).toBeNull();
expect(numWrapped({ value: "" })).toBeNull();
expect(numWrapped({ value: " " })).toBeNull();
expect(numWrapped({ value: "\t\n" })).toBeNull();
expect(numWrapped({ value: undefined })).toBeNull();
expect(numWrapped({})).toBeNull();
});
});
61 changes: 61 additions & 0 deletions src/sec/forms/insider-trading/_valueHelpers.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
/**
* @license
* Copyright 2025 Steven Roussey <sroussey@gmail.com>
* SPDX-License-Identifier: Apache-2.0
*/

// Shared null-on-empty value helpers for SEC insider-trading form extractors.
//
// EDGAR XML elements that are present-but-empty (e.g. <aggregateMarketValue/>
// or <transactionShares><value/></transactionShares>) parse to "". We must
// preserve those as null rather than fabricating a 0 via Number("") === 0,
// which would lie to downstream consumers about the filing's actual content.
//
// Form 144 fields are scalar (the value sits directly on the element), while
// Ownership Document fields are wrapped in a `{ value }` leaf so the schema
// can distinguish an empty value from a missing element. The two pairs
// intentionally have different signatures so they can't collapse together
// and silently accept the wrong shape at a call site.

export function strScalar(n: string | number | undefined | null): string | null {
if (n === undefined || n === null) return null;
const t = String(n).trim();
return t === "" ? null : t;
}

export function numScalar(n: string | number | undefined | null): number | null {
if (n === undefined || n === null) return null;
const t = String(n).trim();
if (t === "") return null;
const v = Number(t);
return Number.isFinite(v) ? v : null;
}

export function strWrapped(
w: { value?: string } | string | undefined | null
): string | null {
if (w === undefined || w === null) return null;
if (typeof w === "string") {
const t = w.trim();
return t === "" ? null : t;
}
const v = w.value;
if (v === undefined || v === null) return null;
const t = String(v).trim();
return t === "" ? null : t;
}

export function numWrapped(
w: { value?: string } | string | undefined | null
): number | null {
if (w === undefined || w === null) return null;
// A bare string at a wrapped call site is a schema mismatch; refuse it
// rather than guessing.
if (typeof w === "string") return null;
const v = w.value;
if (v === undefined || v === null) return null;
const t = String(v).trim();
if (t === "") return null;
const n = Number(t);
return Number.isFinite(n) ? n : null;
}
20 changes: 18 additions & 2 deletions src/storage/entity/cikNameBulkWriter.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,12 +94,28 @@ function createPostgresWriter(): CikNameBulkWriter {
await client.query("BEGIN");
for (let start = 0; start < rows.length; start += PG_MAX_ROWS_PER_STATEMENT) {
const slice = rows.slice(start, start + PG_MAX_ROWS_PER_STATEMENT);
// Per-slice dedup keeps `INSERT ... ON CONFLICT DO UPDATE` from
// failing on duplicate CIKs within a single statement (Postgres
// rejects two rows with the same conflict key in one INSERT).
// Last value wins, matching the SQLite `INSERT OR REPLACE` path.
// Dedup runs AFTER slicing and only shrinks the row set, so the
// 60_000-bind cap (PG_MAX_ROWS_PER_STATEMENT * 2) still holds.
const dedup = new Map<number, string>();
for (const r of slice) dedup.set(r.cik, r.name);
if (dedup.size < slice.length) {
console.debug(
`cikNameBulkWriter: dedup dropped ${slice.length - dedup.size} duplicate cik(s) within a ${slice.length}-row slice`
);
}
if (dedup.size === 0) continue;
const values: (number | string)[] = [];
const placeholders: string[] = [];
for (let i = 0; i < slice.length; i++) {
let i = 0;
for (const [cik, name] of dedup.entries()) {
const base = i * 2;
placeholders.push(`($${base + 1}, $${base + 2})`);
values.push(slice[i].cik, slice[i].name);
values.push(cik, name);
i++;
}
const sql =
`INSERT INTO "cik_names" ("cik", "name") VALUES ` +
Expand Down
14 changes: 14 additions & 0 deletions src/task/ciknames/FetchAllCikNamesTask.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,4 +82,18 @@ describe("createCikNameBulkWriter", () => {
const repo = globalServiceRegistry.get(CIK_NAME_REPOSITORY_TOKEN);
expect((await repo.getAll())?.length ?? 0).toBe(0);
});

it("dedups duplicate CIKs within a single batch, last value wins", async () => {
const writer = createCikNameBulkWriter();
await writer.writeBatch([
{ cik: 1, name: "FIRST" },
{ cik: 2, name: "B" },
{ cik: 1, name: "LAST" },
]);
await writer.close();
const repo = globalServiceRegistry.get(CIK_NAME_REPOSITORY_TOKEN);
expect((await repo.get({ cik: 1 }))?.name).toBe("LAST");
const all = await repo.getAll();
expect(all?.length).toBe(2);
});
});