Skip to content
37 changes: 20 additions & 17 deletions benchmarking/bench-harness.ts
Original file line number Diff line number Diff line change
Expand Up @@ -59,29 +59,32 @@ export const printRow = (
console.log(` -> ${candidateLabel} is ${speedupStr} (median)`);
};

import type { Matrix } from "../shared/matrices.ts";

export const matricesMatch = (
m1: number[][],
m2: number[][],
m1: Matrix,
m2: Matrix,
tolerance: number,
): { ok: true } | { ok: false; reason: string } => {
if (m1.length !== m2.length)
return { ok: false, reason: `row count: ${m1.length} vs ${m2.length}` };
for (let i = 0; i < m1.length; i++) {
const r1 = m1[i]!;
const r2 = m2[i]!;
if (r1.length !== r2.length)
if (m1.vectors !== m2.vectors)
return {
ok: false,
reason: `vector count: ${m1.vectors} vs ${m2.vectors}`,
};
if (m1.dimensions !== m2.dimensions)
return {
ok: false,
reason: `dimension count: ${m1.dimensions} vs ${m2.dimensions}`,
};
for (let i = 0; i < m1.values.length; i++) {
const diff = Math.abs(m1.values[i]! - m2.values[i]!);
if (diff > tolerance) {
const row = Math.floor(i / m1.dimensions);
const col = i % m1.dimensions;
return {
ok: false,
reason: `row ${i} length: ${r1.length} vs ${r2.length}`,
reason: `cell [${row},${col}]: ${m1.values[i]} vs ${m2.values[i]} (diff ${diff})`,
};
for (let j = 0; j < r1.length; j++) {
const diff = Math.abs(r1[j]! - r2[j]!);
if (diff > tolerance) {
return {
ok: false,
reason: `cell [${i},${j}]: ${r1[j]} vs ${r2[j]} (diff ${diff})`,
};
}
}
}
return { ok: true };
Expand Down
37 changes: 20 additions & 17 deletions benchmarking/bench-matmul.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,11 @@ import {
extractMatrixBuffer,
multiplyMatricesOnGPU,
} from "../shared/matrices-gpu.ts";
import { createMatrix, multiplyMatrices } from "../shared/matrices.ts";
import {
createMatrix,
multiplyMatrices,
type Matrix,
} from "../shared/matrices.ts";
import { gpuContext } from "../shared/gpu-context.ts";
import {
type Stats,
Expand All @@ -26,12 +30,12 @@ const SIZES: Array<{ label: string; m: number; k: number; n: number }> = [
const rand = () => Math.random() * 2 - 1;

const benchmarkGPU = async (
a: number[][],
b: number[][],
): Promise<{ stats: Stats; lastResult: number[][] }> => {
a: Matrix,
b: Matrix,
): Promise<{ stats: Stats; lastResult: Matrix }> => {
const m1 = createMatrixBuffer(a);
const m2 = createMatrixBuffer(b);
const mOut = createMatrixBuffer(createMatrix(a.length, b[0]!.length));
const mOut = createMatrixBuffer(createMatrix(a.vectors, b.dimensions));

const stats = await benchmark(async () => {
multiplyMatricesOnGPU(m1, m2, mOut);
Expand Down Expand Up @@ -64,7 +68,7 @@ export const findCrossover = async (
};

const main = async () => {
console.log("matmul A/B benchmark");
console.log("matmul CPU vs GPU benchmark");
console.log(
` baseline = multiplyMatrices, candidate = multiplyMatricesOnGPU`,
);
Expand All @@ -76,21 +80,21 @@ const main = async () => {
const a = createMatrix(m, k, rand);
const b = createMatrix(k, n, rand);

const r1 = await multiplyMatrices(a, b);
const r1 = multiplyMatrices(a, b);
const { lastResult: r2 } = await benchmarkGPU(a, b);
const match = matricesMatch(r1, r2, 1e-4);
if (!match.ok) {
const matchGPU = matricesMatch(r1, r2, 1e-4);
if (!matchGPU.ok) {
anyMismatch = true;
console.log(` [${label}] MISMATCH: ${match.reason}`);
console.log(` [${label}] MISMATCH (GPU): ${matchGPU.reason}`);
continue;
}

const baseline = await benchmark(() => {
multiplyMatrices(a, b);
});
const { stats: candidate } = await benchmarkGPU(a, b);
const speedup = baseline.median / candidate.median;
printRow(label, "baseline", baseline, "candidate", candidate, speedup);
const { stats: candidateGPU } = await benchmarkGPU(a, b);
const speedupGPU = baseline.median / candidateGPU.median;
printRow(label, "CPU", baseline, "GPU", candidateGPU, speedupGPU);
}

console.log("");
Expand Down Expand Up @@ -158,14 +162,13 @@ const main = async () => {
const baselineStats = await benchmark(() => {
multiplyMatrices(a, b);
});
const { stats: candidateStats } = await benchmarkGPU(a, b);
const speedup = baselineStats.median / candidateStats.median;
const winner = speedup >= 1 ? "GPU" : "CPU";
const { stats: gpuStats } = await benchmarkGPU(a, b);
const speedupGPU = baselineStats.median / gpuStats.median;
console.log(
` ${label} (${m}×${k} * ${k}×${n}, params=${actualParams}, output=${outputCells} cells)`,
);
console.log(
` baseline=${fmtMs(baselineStats.median)} candidate=${fmtMs(candidateStats.median)} → ${winner} wins (${speedup >= 1 ? speedup.toFixed(2) : (1 / speedup).toFixed(2)}x)`,
` baseline=${fmtMs(baselineStats.median)} GPU=${fmtMs(gpuStats.median)} (${speedupGPU >= 1 ? speedupGPU.toFixed(2) : (1 / speedupGPU).toFixed(2)}x)`,
);
} catch (e) {
const msg = e instanceof Error ? e.message : String(e);
Expand Down
13 changes: 6 additions & 7 deletions benchmarking/bench-mlp.ts
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
import { createMatrix, createVector } from "../shared/matrices.ts";
import { createMatrix } from "../shared/matrices.ts";
import {
createMatrixBuffer,
extractMatrixBuffer,
Expand Down Expand Up @@ -37,11 +37,11 @@ const createTestWeights = (
): MultilayerPerceptronWeights => ({
wUp: {
weightsMatrix: createMatrix(dimensions, dimensions * MLP_MULTIPLE, rand),
biasVector: createVector(dimensions * MLP_MULTIPLE, rand),
biasVector: createMatrix(1, dimensions * MLP_MULTIPLE, rand),
},
wDown: {
weightsMatrix: createMatrix(dimensions * MLP_MULTIPLE, dimensions, rand),
biasVector: createVector(dimensions, rand),
biasVector: createMatrix(1, dimensions, rand),
},
});

Expand All @@ -50,11 +50,11 @@ const weightsToGPU = (
): MultilayerPerceptronGPUBuffers => ({
wUp: {
weightsMatrix: createMatrixBuffer(weights.wUp.weightsMatrix),
biasVector: createMatrixBuffer([weights.wUp.biasVector]),
biasVector: createMatrixBuffer(weights.wUp.biasVector),
},
wDown: {
weightsMatrix: createMatrixBuffer(weights.wDown.weightsMatrix),
biasVector: createMatrixBuffer([weights.wDown.biasVector]),
biasVector: createMatrixBuffer(weights.wDown.biasVector),
},
});

Expand Down Expand Up @@ -82,7 +82,6 @@ const main = async () => {
const cpuResult = getMultilayerPerceptronActivations(
encoding,
weights,
MLP_MULTIPLE,
);
getMultilayerPerceptronActivationsOnGPU(
encodingBuf,
Expand All @@ -108,7 +107,7 @@ const main = async () => {
);

const cpuStats = await benchmark(() => {
getMultilayerPerceptronActivations(encoding, weights, MLP_MULTIPLE);
getMultilayerPerceptronActivations(encoding, weights);
});
const gpuStats = await benchmark(async () => {
getMultilayerPerceptronActivationsOnGPU(
Expand Down
40 changes: 21 additions & 19 deletions model/activations-types.ts
Original file line number Diff line number Diff line change
@@ -1,42 +1,44 @@
import type { Matrix } from "../shared/matrices.ts";

export type AttentionHeadActivations = {
inputK: number[][];
inputV: number[][];
inputQ: number[][];
attentionRelevancyOutput: number[][];
softmaxOutput: number[][];
output: number[][];
inputK: Matrix;
inputV: Matrix;
inputQ: Matrix;
attentionRelevancyOutput: Matrix;
softmaxOutput: Matrix;
output: Matrix;
};

export type AttentionActivations = {
normalizedInput: number[][];
normalizedInput: Matrix;
heads: AttentionHeadActivations[];
outMatrixInputActivations: number[][];
output: number[][];
outMatrixInputActivations: Matrix;
output: Matrix;
};

export type MultilayerPerceptronActivations = {
normalizedInputToUpping: number[][];
normalizedInputToUpping: Matrix;
/** Already biased - can reverse-calculate subtracting bias weights */
uppingToNonLinear: number[][];
uppingToNonLinear: Matrix;
/** C x 4D matrix */
nonLinearToDowning: number[][];
nonLinearToDowning: Matrix;
/** Already biased - can reverse-calculate subtracting bias weights */
downingOutput: number[][];
downingOutput: Matrix;
};

export type TransformerActivations = {
transformerInput: number[][];
transformerInput: Matrix;
attention: AttentionActivations;
mlp: MultilayerPerceptronActivations;
// Can calculate transformer output by taking input and adding both attention + mlp output
};

export type Activations = {
inputPositionToVocabPosition: number[];
tokensToPosition: number[][];
positionToTransformers: number[][];
tokensToPosition: Matrix;
positionToTransformers: Matrix;
transformerActivations: TransformerActivations[];
transformersToNormalizer: number[][];
normalizerToUnembeddings: number[][];
unembeddingsOutputLogits: number[][];
transformersToNormalizer: Matrix;
normalizerToUnembeddings: Matrix;
unembeddingsOutputLogits: Matrix;
};
8 changes: 4 additions & 4 deletions model/model-gpu-helpers.ts
Original file line number Diff line number Diff line change
Expand Up @@ -50,17 +50,17 @@ export const loadWeightsIntoGpu = (weight: Weights): WeightGPUBuffers => {
weightsMatrix: createMatrixBuffer(
t.multilayerPerceptron.wDown.weightsMatrix,
),
biasVector: createMatrixBuffer([
biasVector: createMatrixBuffer(
t.multilayerPerceptron.wDown.biasVector,
]),
),
},
wUp: {
weightsMatrix: createMatrixBuffer(
t.multilayerPerceptron.wUp.weightsMatrix,
),
biasVector: createMatrixBuffer([
biasVector: createMatrixBuffer(
t.multilayerPerceptron.wUp.biasVector,
]),
),
},
},
}),
Expand Down
52 changes: 23 additions & 29 deletions model/model-helpers.test.ts
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
import { describe, expect, it } from "vitest";
import { END_OF_SEQUENCE_TOKEN } from "../shared/const.ts";
import { createMatrix, type Matrix } from "../shared/matrices.ts";
import {
extractHiddenDimensionSize,
findTokenIndex,
operateCombinedWeights,
} from "./model-helpers.ts";
import type { Model } from "./model-types.ts";

const vector = (length: number, value = 1) => new Array(length).fill(value);

const matrix = (rows: number, columns: number, value = 1) =>
new Array(rows).fill(null).map(() => vector(columns, value));
const m = (rows: number, columns: number, value = 1): Matrix =>
createMatrix(rows, columns, () => value);

const HIDDEN_DIMENSION_SIZE = 4;
const DEFAULT_MLP_MULTIPLE = 4;
Expand All @@ -19,69 +18,64 @@ const validModel: Model = {
vocabulary: ["hello", "world", "beer", END_OF_SEQUENCE_TOKEN],
headsCount: 2,
mlpMultiple: DEFAULT_MLP_MULTIPLE,
embeddings: matrix(4, HIDDEN_DIMENSION_SIZE),
unembeddings: matrix(HIDDEN_DIMENSION_SIZE, 4),
embeddings: m(4, HIDDEN_DIMENSION_SIZE),
unembeddings: m(HIDDEN_DIMENSION_SIZE, 4),
transformers: [
{
attention: {
Q: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
K: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
V: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
out: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
Q: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
K: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
V: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
out: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE),
},
multilayerPerceptron: {
wUp: {
weightsMatrix: matrix(
weightsMatrix: m(
HIDDEN_DIMENSION_SIZE,
DEFAULT_MLP_DIMENSION_SIZE,
),
biasVector: vector(DEFAULT_MLP_DIMENSION_SIZE),
biasVector: m(1, DEFAULT_MLP_DIMENSION_SIZE),
},
wDown: {
weightsMatrix: matrix(
weightsMatrix: m(
DEFAULT_MLP_DIMENSION_SIZE,
HIDDEN_DIMENSION_SIZE,
),
biasVector: vector(HIDDEN_DIMENSION_SIZE),
biasVector: m(1, HIDDEN_DIMENSION_SIZE),
},
},
},
],
};

const createModel = (overrides: Partial<Model> = {}): Model => ({
...structuredClone(validModel),
...overrides,
});

const createModelWithValue = (value: number): Model => ({
...validModel,
embeddings: matrix(4, HIDDEN_DIMENSION_SIZE, value),
unembeddings: matrix(HIDDEN_DIMENSION_SIZE, 4, value),
embeddings: m(4, HIDDEN_DIMENSION_SIZE, value),
unembeddings: m(HIDDEN_DIMENSION_SIZE, 4, value),
transformers: [
{
attention: {
Q: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
K: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
V: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
out: matrix(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
Q: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
K: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
V: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
out: m(HIDDEN_DIMENSION_SIZE, HIDDEN_DIMENSION_SIZE, value),
},
multilayerPerceptron: {
wUp: {
weightsMatrix: matrix(
weightsMatrix: m(
HIDDEN_DIMENSION_SIZE,
DEFAULT_MLP_DIMENSION_SIZE,
value,
),
biasVector: vector(DEFAULT_MLP_DIMENSION_SIZE, value),
biasVector: m(1, DEFAULT_MLP_DIMENSION_SIZE, value),
},
wDown: {
weightsMatrix: matrix(
weightsMatrix: m(
DEFAULT_MLP_DIMENSION_SIZE,
HIDDEN_DIMENSION_SIZE,
value,
),
biasVector: vector(HIDDEN_DIMENSION_SIZE, value),
biasVector: m(1, HIDDEN_DIMENSION_SIZE, value),
},
},
},
Expand Down
Loading