Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
80 changes: 65 additions & 15 deletions packages/web/src/index.css
Original file line number Diff line number Diff line change
Expand Up @@ -274,53 +274,103 @@ td {
height: 48px;
}

th.rank-col {
text-align: right;
font-weight: normal;
color: var(--color-text-tertiary);
}

#models-table th:first-child,
#models-table td:first-child {
min-width: 2.75rem;
}

tbody {
td {
color: var(--color-text-tertiary);
}

td:nth-child(1) {
td:nth-child(2) {
font-weight: 500;
}

td:nth-child(1),
td:nth-child(2),
td:nth-child(3),
td:nth-child(4),
td:nth-child(5),
td:nth-child(6),
td:nth-child(9),
td:nth-child(10),
td:nth-child(11),
td:nth-child(12),
td:nth-child(13),
td:nth-child(14),
td:nth-child(15),
td:nth-child(16) {
td:nth-child(16),
td:nth-child(17),
td:nth-child(18),
td:nth-child(19),
td:nth-child(20) {
color: var(--color-text);
}

td:nth-child(5),
td:nth-child(6),
td:nth-child(18) {
td:nth-child(9),
td:nth-child(10),
td:nth-child(22) {
font-size: 0.8125rem;
font-family: var(--font-mono);
text-transform: uppercase;
}

td:nth-child(3),
td:nth-child(4),
td:nth-child(9),
td:nth-child(10),
td:nth-child(11),
td:nth-child(12),
td:nth-child(7),
td:nth-child(8),
td:nth-child(13),
td:nth-child(14),
td:nth-child(15),
td:nth-child(16),
td:nth-child(17) {
td:nth-child(17),
td:nth-child(18),
td:nth-child(19),
td:nth-child(20),
td:nth-child(21) {
font-size: 0.8125rem;
font-family: var(--font-mono);
}

td.rank {
font-family: var(--font-mono);
font-size: 0.8125rem;
color: var(--color-text-tertiary);
text-align: right;
font-variant-numeric: tabular-nums;
}

.score {
position: relative;
display: flex;
align-items: center;
min-width: 3rem;
}

.score::before {
content: "";
position: absolute;
left: 0;
top: 50%;
transform: translateY(-50%);
height: 0.875rem;
width: calc(var(--score) * 1%);
background: color-mix(in srgb, var(--color-brand) 22%, transparent);
border-radius: 2px;
z-index: 0;
}

.score-value {
position: relative;
z-index: 1;
font-family: var(--font-mono);
font-size: 0.8125rem;
font-variant-numeric: tabular-nums;
}

.provider-cell {
display: flex;
align-items: center;
Expand Down
3 changes: 3 additions & 0 deletions packages/web/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -131,6 +131,9 @@ function prepareRow(row: TableRow): VirtualizedRow {
const sortValues: VirtualizedRow["sortValues"] = [
row.providerName,
row.modelName,
row.overallScore,
row.valueScore,
row.capabilityScore,
row.family,
row.providerId,
row.modelId,
Expand Down
39 changes: 38 additions & 1 deletion packages/web/src/render.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ import { renderToString } from "hono/jsx/dom/server";
import { existsSync } from "fs";
import path from "path";
import { type TableRow, renderRow, getLargestRow } from "./shared.js";
import { annotateScores } from "./score.js";

export const Providers = await generate(
path.join(import.meta.dir, "..", "..", "..", "providers")
Expand Down Expand Up @@ -64,7 +65,7 @@ for (const [providerId] of Object.entries(Providers)) {

export const INITIAL_ROW_COUNT = 50;

export const TableRows: TableRow[] = Object.entries(Providers)
const RawRows = Object.entries(Providers)
.sort(([, providerA], [, providerB]) =>
providerA.name.localeCompare(providerB.name)
)
Expand Down Expand Up @@ -102,6 +103,11 @@ export const TableRows: TableRow[] = Object.entries(Providers)
}))
);

// Attach objective scores, then default the table to a "best overall" ranking.
export const TableRows: TableRow[] = annotateScores(RawRows).sort(
(a, b) => b.overallScore - a.overallScore
);

const largestRow = getLargestRow(TableRows);

export const Rendered = renderToString(
Expand Down Expand Up @@ -142,12 +148,43 @@ export const Rendered = renderToString(
<table id="models-table">
<thead>
<tr>
<th class="rank-col">#</th>
<th class="sortable" data-type="text">
Provider <span class="sort-indicator"></span>
</th>
<th class="sortable" data-type="text">
Model <span class="sort-indicator"></span>
</th>
<th class="sortable" data-type="number">
<div class="header-container">
<span class="header-text">
Overall
<br />
<span class="desc">score /100</span>
</span>
<span class="sort-indicator"></span>
</div>
</th>
<th class="sortable" data-type="number">
<div class="header-container">
<span class="header-text">
Value
<br />
<span class="desc">score /100</span>
</span>
<span class="sort-indicator"></span>
</div>
</th>
<th class="sortable" data-type="number">
<div class="header-container">
<span class="header-text">
Capability
<br />
<span class="desc">score /100</span>
</span>
<span class="sort-indicator"></span>
</div>
</th>
<th class="sortable" data-type="text">
Family <span class="sort-indicator"></span>
</th>
Expand Down
134 changes: 134 additions & 0 deletions packages/web/src/score.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,134 @@
import type { TableRow } from "./shared.js";

/**
* Objective model scoring.
*
* Every input is a factual field already in the catalog (cost, context window,
* output limit, capability flags, modality breadth, release date). Nothing is
* benchmarked or hand-graded. We turn those raw fields into three transparent,
* normalized 0-100 indices so the table can be ranked from different angles:
*
* - capability : what the model can do (capability flags + modality breadth)
* - cost : price efficiency (cheaper -> higher; free -> top)
* - context : context window + output limit (log-scaled)
* - recency : how recently it was released
*
* Each composite below is just a weighted blend of those four components. The
* weights are the only opinion in the file and are intentionally kept here, in
* one place, so they're easy to audit or change.
*/
const WEIGHTS = {
// Well-rounded "best overall".
overall: { capability: 0.4, cost: 0.3, context: 0.2, recency: 0.1 },
// Cheap-yet-capable wins.
value: { capability: 0.35, cost: 0.5, context: 0.1, recency: 0.05 },
// What the model can do dominates; price is a minor tiebreaker.
capability: { capability: 0.6, cost: 0.15, context: 0.2, recency: 0.05 },
} as const;

/** Rows before scores are attached. */
type ScorableRow = Omit<
TableRow,
"overallScore" | "valueScore" | "capabilityScore"
>;

const NEUTRAL = 50;

/**
* Returns a function that maps a raw value to 0-100 via min-max over the
* dataset. Non-finite inputs (or a flat dataset) collapse to a neutral 50 so a
* missing field never silently wins or loses.
*/
function normalizer(values: number[]): (value: number) => number {
const finite = values.filter((value) => Number.isFinite(value));
const min = finite.length ? Math.min(...finite) : 0;
const max = finite.length ? Math.max(...finite) : 0;
const span = max - min;
return (value: number) => {
if (!Number.isFinite(value) || span === 0) return NEUTRAL;
return ((value - min) / span) * 100;
};
}

/** Capability flags + how many input/output modalities are supported. */
function capabilityRaw(row: ScorableRow): number {
const flags =
(row.toolCall ? 1 : 0) +
(row.reasoning ? 1 : 0) +
(row.structuredOutput ? 1 : 0) +
(row.temperature ? 1 : 0);
return flags + row.input.length + row.output.length;
}

/** Context window + output limit, log-scaled (they span orders of magnitude). */
function contextRaw(row: ScorableRow): number {
return (
Math.log10((row.contextLimit || 0) + 1) +
0.5 * Math.log10((row.outputLimit || 0) + 1)
);
}

/**
* Blended price per 1M tokens (input + output). Returns NaN when no pricing is
* published so the model lands on a neutral cost score rather than a free pass.
*/
function blendedCost(row: ScorableRow): number {
const parts = [row.inputCost, row.outputCost].filter(
(cost): cost is number => cost !== undefined,
);
if (parts.length === 0) return NaN;
return parts.reduce((sum, cost) => sum + cost, 0);
}

/** Release date as an epoch (ms); newer is higher. NaN when unparseable. */
function recencyRaw(row: ScorableRow): number {
return Date.parse(row.releaseDate);
}

function round(value: number): number {
return Math.round(value * 10) / 10;
}

/**
* Computes the three composite scores for every row and returns new rows with
* `overallScore`, `valueScore` and `capabilityScore` attached. Normalization is
* over the whole set, so scores are relative to the rest of the catalog.
*/
export function annotateScores(rows: ScorableRow[]): TableRow[] {
const capNorm = normalizer(rows.map(capabilityRaw));
const ctxNorm = normalizer(rows.map(contextRaw));
const recNorm = normalizer(rows.map(recencyRaw));
// Cost is log-scaled then inverted: lower price -> higher score.
const costNorm = normalizer(
rows.map((row) => {
const cost = blendedCost(row);
return Number.isNaN(cost) ? NaN : Math.log10(cost + 0.01);
}),
);

return rows.map((row) => {
const capability = capNorm(capabilityRaw(row));
const context = ctxNorm(contextRaw(row));
const recency = recNorm(recencyRaw(row));
const cost = blendedCost(row);
// Invert cost: cheapest model in the set scores highest. Unknown -> neutral.
const costScore = Number.isNaN(cost)
? NEUTRAL
: 100 - costNorm(Math.log10(cost + 0.01));

const blend = (w: (typeof WEIGHTS)[keyof typeof WEIGHTS]) =>
round(
capability * w.capability +
costScore * w.cost +
context * w.context +
recency * w.recency,
);

return {
...row,
overallScore: blend(WEIGHTS.overall),
valueScore: blend(WEIGHTS.value),
capabilityScore: blend(WEIGHTS.capability),
};
});
}
19 changes: 19 additions & 0 deletions packages/web/src/shared.ts
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,9 @@ export interface TableRow {
modelId: string;
modelName: string;
family?: string;
overallScore: number;
valueScore: number;
capabilityScore: number;
toolCall: boolean;
reasoning: boolean;
input: string[];
Expand Down Expand Up @@ -78,6 +81,17 @@ export function weightsText(value: boolean) {
return value ? "Open" : "Closed";
}

export function rankText(index: number) {
return index >= 0 ? String(index + 1) : "";
}

export function renderScore(value: number) {
const pct = Math.max(0, Math.min(100, value));
return `<div class="score" style="--score:${pct}"><span class="score-value">${value.toFixed(
1
)}</span></div>`;
}

export function renderModalityIcon(modality: string) {
const label =
modality === "pdf"
Expand All @@ -101,10 +115,14 @@ export function renderCopyButton(modelId: string) {

export function renderRow(row: TableRow, index: number) {
return `<tr data-index="${index}">
<td class="rank">${rankText(index)}</td>
<td><div class="provider-cell">${row.providerLogoSvg}<span>${escapeHtml(
row.providerName
)}</span></div></td>
<td>${escapeHtml(row.modelName)}</td>
<td>${renderScore(row.overallScore)}</td>
<td>${renderScore(row.valueScore)}</td>
<td>${renderScore(row.capabilityScore)}</td>
<td>${escapeHtml(row.family ?? "-")}</td>
<td>${escapeHtml(row.providerId)}</td>
<td><div class="model-id-cell"><span class="model-id-text">${escapeHtml(
Expand Down Expand Up @@ -136,6 +154,7 @@ export function renderRow(row: TableRow, index: number) {
export function getLargestRow(rows: TableRow[]): TableRow {
const worst: TableRow = {
providerId: "", providerName: "", providerLogoSvg: "", modelId: "", modelName: "",
overallScore: 100, valueScore: 100, capabilityScore: 100,
toolCall: true, reasoning: true,
input: [], output: [],
contextLimit: 0, outputLimit: 0,
Expand Down