Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
48 changes: 35 additions & 13 deletions reproducibility/site/scripts/build-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,7 @@ interface RunDetail {
retriever_id: string;
retriever_display: string;
paradigm: string;
model_display: string;
metrics: Record<string, number>;
config: Record<string, unknown>;
timing: Record<string, number>;
Expand Down Expand Up @@ -200,6 +201,7 @@ function readRunDetails(retrievers: Record<string, { display_name: string; parad
dataset_id: payload.pipeline.dataset_id,
method_id: payload.pipeline.method_id,
model: payload.pipeline.model,
model_display: displayModel(payload.pipeline.model),
retriever_id: retrId,
retriever_display: retrievers[retrId]?.display_name ?? retrId,
paradigm: retrievers[retrId]?.paradigm ?? retr.paradigm ?? "",
Expand Down Expand Up @@ -246,6 +248,7 @@ function buildPerDatasetViews(
method_id: lm.id,
method_display: lm.display,
model: r.model,
model_display: displayModel(r.model),
retriever_id: r.retriever_id,
retriever_display: r.retriever,
run_id: r.run_id, // populated/overwritten by the best cell
Expand Down Expand Up @@ -376,33 +379,38 @@ function buildHomeMatrix(
method_id: lm.id,
method_display: lm.display,
model: r.model,
model_display: displayModel(r.model),
retriever_id: r.retriever_id,
retriever_display: r.retriever,
}),
);

// Dataset columns + primary/secondary metric per dataset.
// Primary = nDCG@10 if present, else first eval metric.
// Secondary = recall@1000 (DL) or recall@100 (BEIR), else null.
// Dataset columns: derive primary/secondary metric from what's ACTUALLY in
// the matrix data (not from the registry whitelist, which may over-specify).
// primary = ndcg_cut_10 if present, else the first metric found.
// secondary = recall_1000 if present, else recall_100, else any other metric.
const datasetCols = Object.values(datasets)
.sort((a, b) => a.id.localeCompare(b.id))
.map((d) => {
const metrics = d.eval_metrics;
const present = new Set<string>();
for (const row of matrixRows) {
for (const m of Object.keys(row.values?.[d.id] ?? {})) present.add(m);
}
const arr = Array.from(present);
const primary =
metrics.find((m) => m === "ndcg_cut_10") ??
metrics[0] ??
null;
present.has("ndcg_cut_10") ? "ndcg_cut_10" : arr[0] ?? null;
const secondary =
metrics.find((m) => m === "recall_1000") ??
metrics.find((m) => m === "recall_100") ??
metrics.find((m) => m !== primary) ??
null;
present.has("recall_1000")
? "recall_1000"
: present.has("recall_100")
? "recall_100"
: arr.find((m) => m !== primary) ?? null;
return {
id: d.id,
name: d.name,
primary_metric: primary,
secondary_metric: secondary,
all_metrics: metrics,
all_metrics: arr.sort(),
};
});

Expand Down Expand Up @@ -453,6 +461,7 @@ function buildPerMethodViews(rows: ResultRow[]) {
(r) => `${r.model}|${r.retriever_id}`,
(r) => ({
model: r.model,
model_display: displayModel(r.model),
retriever_id: r.retriever_id,
retriever_display: r.retriever,
}),
Expand All @@ -479,6 +488,7 @@ function buildPerRetrieverViews(rows: ResultRow[]) {
method_id: lm.id,
method_display: lm.display,
model: r.model,
model_display: displayModel(r.model),
}),
);
writeJSON(path.join(VIEWS_DIR, `retriever-${retriever_id}.json`), {
Expand All @@ -493,6 +503,13 @@ function encodePathSegment(s: string): string {
return s.replace(/\//g, "__");
}

// Strip provider prefix from a model id for display: "openai/gpt-4.1" → "gpt-4.1".
// The canonical id stays in the data; this is purely cosmetic.
function displayModel(s: string): string {
const i = s.indexOf("/");
return i >= 0 ? s.slice(i + 1) : s;
}

// ---------- main ------------------------------------------------------------

function main() {
Expand Down Expand Up @@ -556,7 +573,12 @@ function main() {
writeJSON(path.join(OUT_DIR, "methods.json"), methodList);

const modelList = Array.from(modelCounts.entries())
.map(([id, run_count]) => ({ id, run_count, slug: encodePathSegment(id) }))
.map(([id, run_count]) => ({
id,
display: displayModel(id),
run_count,
slug: encodePathSegment(id),
}))
.sort((a, b) => a.id.localeCompare(b.id));
writeJSON(path.join(OUT_DIR, "models.json"), modelList);

Expand Down
171 changes: 171 additions & 0 deletions reproducibility/site/src/components/InteractiveTable.astro
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
---
/**
* Wraps a server-rendered <table> with a global search input + click-to-sort
* column headers. Vanilla JS — no framework island.
*
* Conventions inside the wrapped table:
* - Every <th> in <thead> is sortable. Add data-sort-skip on a <th> to skip it.
* - Each <td> can have data-sort-value="<number-or-string>" to override the
* visible text for sorting (useful for cells containing links or formatted
* numbers). When absent, the cell's textContent is used.
* - Each <tr> in <tbody> is searched against the input by concatenated
* textContent (case-insensitive).
*/

interface Props {
/** Search placeholder text. */
searchPlaceholder?: string;
/** Initial sort: { columnIndex, direction } */
initialSort?: { column: number; direction: "asc" | "desc" };
}

const { searchPlaceholder = "Search rows…", initialSort } = Astro.props;
const initialSortAttr = initialSort
? `${initialSort.column}:${initialSort.direction}`
: "";
---

<div class="qg-itable" data-initial-sort={initialSortAttr}>
<div class="mb-3 flex flex-wrap items-center gap-3 text-sm">
<input
type="search"
class="qg-itable-search w-64 rounded border border-qg-border bg-qg-bg-soft px-3 py-1.5 text-sm focus:border-qg-accent focus:outline-none"
placeholder={searchPlaceholder}
autocomplete="off"
/>
<span class="text-qg-fg-muted">
<span class="qg-itable-shown">0</span> / <span class="qg-itable-total">0</span> rows
</span>
</div>
<slot />
</div>

<style is:global>
.qg-itable table thead th {
cursor: pointer;
user-select: none;
}
.qg-itable table thead th[data-sort-skip] {
cursor: default;
}
.qg-itable table thead th .qg-sort-arrow {
opacity: 0.35;
margin-left: 0.25rem;
font-size: 0.7rem;
}
.qg-itable table thead th[data-sort-dir="asc"] .qg-sort-arrow,
.qg-itable table thead th[data-sort-dir="desc"] .qg-sort-arrow {
opacity: 1;
color: var(--qg-accent);
}
</style>

<script>
function wireInteractiveTable(root: HTMLElement) {
if (root.dataset.qgWired === "1") return;
root.dataset.qgWired = "1";

const table = root.querySelector("table");
if (!table) return;
const tbody = table.querySelector("tbody");
if (!tbody) return;
const headers = Array.from(table.querySelectorAll("thead th"));
const allRows = Array.from(tbody.querySelectorAll<HTMLTableRowElement>("tr"));
const shownCounter = root.querySelector(".qg-itable-shown");
const totalCounter = root.querySelector(".qg-itable-total");
const search = root.querySelector<HTMLInputElement>(".qg-itable-search");

if (totalCounter) totalCounter.textContent = String(allRows.length);

// Pre-cache sortable values + searchable text per row.
const meta = allRows.map((tr) => ({
tr,
searchText: tr.textContent?.toLowerCase() ?? "",
cells: Array.from(tr.cells).map((c) => {
const raw = c.dataset.sortValue ?? c.textContent ?? "";
const num = parseFloat(raw);
return { raw, num: Number.isFinite(num) ? num : null };
}),
}));

let currentSort: { column: number; direction: "asc" | "desc" } | null = null;

function addArrows() {
headers.forEach((th) => {
if (th.querySelector(".qg-sort-arrow")) return;
if ((th as HTMLElement).dataset.sortSkip !== undefined) return;
const span = document.createElement("span");
span.className = "qg-sort-arrow";
span.textContent = "↕";
th.appendChild(span);
});
}

function setSort(colIdx: number, dir: "asc" | "desc") {
currentSort = { column: colIdx, direction: dir };
headers.forEach((th, i) => {
const arrow = th.querySelector<HTMLElement>(".qg-sort-arrow");
if (i === colIdx) {
(th as HTMLElement).dataset.sortDir = dir;
if (arrow) arrow.textContent = dir === "asc" ? "↑" : "↓";
} else {
delete (th as HTMLElement).dataset.sortDir;
if (arrow) arrow.textContent = "↕";
}
});
const sorted = [...meta].sort((a, b) => {
const av = a.cells[colIdx];
const bv = b.cells[colIdx];
if (av?.num !== null && bv?.num !== null) {
return dir === "asc" ? av.num! - bv.num! : bv.num! - av.num!;
}
return dir === "asc"
? (av?.raw ?? "").localeCompare(bv?.raw ?? "")
: (bv?.raw ?? "").localeCompare(av?.raw ?? "");
});
const frag = document.createDocumentFragment();
sorted.forEach((m) => frag.appendChild(m.tr));
tbody.appendChild(frag);
}

function applySearch() {
const q = (search?.value ?? "").trim().toLowerCase();
let shown = 0;
for (const m of meta) {
// Rows hidden by an external filter (e.g. the home page's chip
// selection) carry .qg-chip-hidden — respect that as a hard veto.
const chipHidden = m.tr.classList.contains("qg-chip-hidden");
const matchesSearch = !q || m.searchText.includes(q);
const ok = matchesSearch && !chipHidden;
m.tr.style.display = ok ? "" : "none";
if (ok) shown++;
}
if (shownCounter) shownCounter.textContent = String(shown);
}

addArrows();
headers.forEach((th, i) => {
if ((th as HTMLElement).dataset.sortSkip !== undefined) return;
th.addEventListener("click", () => {
const nextDir =
currentSort?.column === i && currentSort.direction === "asc" ? "desc" : "asc";
setSort(i, nextDir);
});
});
search?.addEventListener("input", applySearch);
// External code can fire this event after toggling .qg-chip-hidden on
// rows to re-sync row visibility + the shown-count badge.
root.addEventListener("qg-itable-reapply", () => applySearch());

// Initial state.
applySearch();
const initial = root.dataset.initialSort;
if (initial) {
const [colStr, dir] = initial.split(":");
const col = parseInt(colStr, 10);
if (!Number.isNaN(col)) setSort(col, (dir as "asc" | "desc") ?? "asc");
}
}

document.querySelectorAll<HTMLElement>(".qg-itable").forEach(wireInteractiveTable);
</script>
75 changes: 40 additions & 35 deletions reproducibility/site/src/pages/datasets/[id].astro
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
import Default from "../../layouts/Default.astro";
import EmptyState from "../../components/EmptyState.astro";
import MetricCell from "../../components/MetricCell.astro";
import InteractiveTable from "../../components/InteractiveTable.astro";
import datasetsList from "../../data/datasets.json";

// Eagerly load all per-dataset shards. Astro statically analyzes this so it
Expand Down Expand Up @@ -47,42 +48,46 @@ const metricCols: string[] = view?.metric_columns ?? datasetMeta?.eval_metrics ?
/>
</div>
) : (
<div class="mt-6 overflow-x-auto">
<table class="w-full border-collapse text-sm">
<thead>
<tr class="border-b border-qg-border text-left">
<th class="px-3 py-2">Method</th>
<th class="px-3 py-2">Model</th>
<th class="px-3 py-2">Retriever</th>
{metricCols.map((m) => (
<th class="px-3 py-2 text-right qg-mono text-xs">{m}</th>
))}
<th class="px-3 py-2 text-right">Run</th>
</tr>
</thead>
<tbody>
{runs.map((r: any) => (
<tr class="border-b border-qg-border/60 hover:bg-qg-bg-soft">
<td class="px-3 py-2 font-medium">{r.method_display ?? r.method_id}</td>
<td class="px-3 py-2 qg-mono text-xs">{r.model}</td>
<td class="px-3 py-2 text-xs">{r.retriever_display ?? r.retriever_id}</td>
{metricCols.map((m) => (
<td class="px-3 py-2 text-right">
<MetricCell value={r.metrics?.[m]} best={r.best_for?.[m]} />
</td>
<div class="mt-6">
<InteractiveTable searchPlaceholder="Filter by method, model, retriever…">
<div class="overflow-x-auto">
<table class="w-full border-collapse text-sm">
<thead>
<tr class="border-b border-qg-border text-left">
<th class="px-3 py-2">Method</th>
<th class="px-3 py-2">Model</th>
<th class="px-3 py-2">Retriever</th>
{metricCols.map((m) => (
<th class="px-3 py-2 text-right qg-mono text-xs">{m}</th>
))}
<th class="px-3 py-2 text-right" data-sort-skip>Run</th>
</tr>
</thead>
<tbody>
{runs.map((r: any) => (
<tr class="border-b border-qg-border/60 hover:bg-qg-bg-soft">
<td class="px-3 py-2 font-medium">{r.method_display ?? r.method_id}</td>
<td class="px-3 py-2 qg-mono text-xs" data-sort-value={r.model_display ?? r.model}>{r.model_display ?? r.model}</td>
<td class="px-3 py-2 text-xs">{r.retriever_display ?? r.retriever_id}</td>
{metricCols.map((m) => (
<td class="px-3 py-2 text-right" data-sort-value={r.metrics?.[m] ?? ""}>
<MetricCell value={r.metrics?.[m]} best={r.best_for?.[m]} />
</td>
))}
<td class="px-3 py-2 text-right">
<a
class="qg-mono text-xs text-qg-accent hover:underline"
href={`/runs/${r.run_id}`}
>
{r.run_id.slice(0, 8)}…
</a>
</td>
</tr>
))}
<td class="px-3 py-2 text-right">
<a
class="qg-mono text-xs text-qg-accent hover:underline"
href={`/runs/${r.run_id}`}
>
{r.run_id.slice(0, 8)}…
</a>
</td>
</tr>
))}
</tbody>
</table>
</tbody>
</table>
</div>
</InteractiveTable>
</div>
)
}
Expand Down
Loading
Loading