Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 36 additions & 8 deletions reproducibility/site/scripts/build-data.ts
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ interface RunDetail {
params_hash: string;
dataset_id: string;
method_id: string;
method_display: string;
model: string;
retriever_id: string;
retriever_display: string;
Expand Down Expand Up @@ -195,11 +196,16 @@ function readRunDetails(retrievers: Record<string, { display_name: string; parad
const retr = payload.config?.retrieval ?? {};
const retrId = retr.retriever_id ?? "";
const artifacts = payload.artifacts ?? {};
const lm = logicalMethod(
payload.pipeline.method_id,
JSON.stringify(payload.config?.method_params ?? {}),
);
out[payload.run_id] = {
run_id: payload.run_id,
params_hash: hash,
dataset_id: payload.pipeline.dataset_id,
method_id: payload.pipeline.method_id,
method_display: lm.display,
model: payload.pipeline.model,
model_display: displayModel(payload.pipeline.model),
retriever_id: retrId,
Expand Down Expand Up @@ -235,10 +241,9 @@ function buildPerDatasetViews(
}

for (const [datasetId, dsRows] of byDataset) {
const allowed = datasets[datasetId]?.eval_metrics ?? [];

// Pivot to one row per (logical_method, model, retriever). Variants are
// folded by max value per metric — matches the home matrix.
// folded by max value per metric — matches the home matrix. Track run_id
// per metric so the "best" cell links to the run that achieved it.
const map = new Map<string, any>();
for (const r of dsRows) {
const lm = logicalMethod(r.method_id, r.method_params_json);
Expand All @@ -251,21 +256,42 @@ function buildPerDatasetViews(
model_display: displayModel(r.model),
retriever_id: r.retriever_id,
retriever_display: r.retriever,
run_id: r.run_id, // populated/overwritten by the best cell
run_ids: {} as Record<string, string>, // metric → run_id of the winning value
metrics: {} as Record<string, number>,
best_for: {} as Record<string, boolean>,
});
}
const row = map.get(key);
if (row.metrics[r.metric] === undefined || r.value > row.metrics[r.metric]) {
row.metrics[r.metric] = r.value;
row.run_id = r.run_id;
row.run_ids[r.metric] = r.run_id;
}
}

// Discover which metrics actually exist in the data — the registry's
// eval_metrics is aspirational and may over-specify (e.g. MAP on DL,
// recall_1000 on BEIR). Render only what we have.
const present = new Set<string>();
for (const row of map.values()) {
for (const m of Object.keys(row.metrics)) present.add(m);
}
const allMetrics = Array.from(present);
const primary = present.has("ndcg_cut_10") ? "ndcg_cut_10" : allMetrics[0] ?? null;
const secondary = present.has("recall_1000")
? "recall_1000"
: present.has("recall_100")
? "recall_100"
: allMetrics.find((m) => m !== primary) ?? null;
// Order: primary first, secondary second, then anything else.
const orderedMetrics = [
...(primary ? [primary] : []),
...(secondary && secondary !== primary ? [secondary] : []),
...allMetrics.filter((m) => m !== primary && m !== secondary),
];

// best_for flags relative to the rows above.
const list = Array.from(map.values());
for (const m of allowed) {
for (const m of orderedMetrics) {
let best = -Infinity;
let bestRow: any = null;
for (const row of list) {
Expand All @@ -277,8 +303,10 @@ function buildPerDatasetViews(

writeJSON(path.join(VIEWS_DIR, `dataset-${datasetId}.json`), {
dataset_id: datasetId,
dataset: datasets[datasetId] ?? { id: datasetId, name: datasetId, eval_metrics: allowed },
metric_columns: allowed,
dataset: datasets[datasetId] ?? { id: datasetId, name: datasetId, eval_metrics: orderedMetrics },
metric_columns: orderedMetrics,
primary_metric: primary,
secondary_metric: secondary,
runs: list,
});
}
Expand Down
133 changes: 133 additions & 0 deletions reproducibility/site/src/components/FilterChips.astro
Original file line number Diff line number Diff line change
@@ -0,0 +1,133 @@
---
/**
* Chip-style filter bar for any leaderboard table.
*
* Each group corresponds to a column on the table's <tr data-*> attributes
* (e.g. data-method, data-model). Clicking a chip hides rows whose attribute
* doesn't match, by toggling the .qg-chip-hidden class and dispatching
* "qg-itable-reapply" on the nearest .qg-itable wrapper so InteractiveTable
* re-syncs its row-visibility + shown-count.
*
* The optional `metric` group is special: it swaps .qg-cell-primary /
* .qg-cell-secondary visibility and the matching column-label spans across
* the whole page, then re-keys cells' data-sort-value to the now-visible
* metric so sort follows what's on screen.
*/
interface ChipValue {
value: string;
label: string;
}
interface ChipGroup {
/** "method" | "model" | "retriever" | "metric"; matches <tr data-{key}> */
key: string;
/** Visible header text. */
label: string;
/** First item is shown as the active default. For `metric`, use
* [{value:"primary", label:"nDCG@10"}, {value:"secondary", label:"Recall"}]. */
values: ChipValue[];
}
interface Props {
/** id of the table to filter (used to scope row queries to this table). */
tableId: string;
groups: ChipGroup[];
}
const { tableId, groups } = Astro.props;
---

<section class="flex flex-wrap gap-x-6 gap-y-3 text-sm" data-qg-filters data-table={tableId}>
{groups.map((g) => (
<div class="flex flex-wrap items-center gap-2">
<span class="text-qg-fg-muted">{g.label}:</span>
<div data-group={g.key} class="flex flex-wrap gap-1.5">
{g.values.map((v, i) => (
<button
type="button"
data-value={v.value}
class:list={["qg-chip", i === 0 && "qg-chip-active"]}
>
{v.label}
</button>
))}
</div>
</div>
))}
</section>

<style>
.qg-chip {
@apply rounded-full border border-qg-border bg-qg-bg-soft px-3 py-1 text-xs font-medium text-qg-fg-muted hover:border-qg-accent;
}
.qg-chip-active {
@apply border-qg-accent bg-qg-accent text-white hover:border-qg-accent;
}
</style>

<script>
document.querySelectorAll<HTMLElement>("[data-qg-filters]").forEach((bar) => {
if (bar.dataset.qgWired === "1") return;
bar.dataset.qgWired = "1";

const tableId = bar.dataset.table!;
const table = document.getElementById(tableId);
if (!table) return;
const tbody = table.querySelector("tbody");
if (!tbody) return;
const itableRoot = table.closest(".qg-itable") as HTMLElement | null;

const state: Record<string, string> = {};
bar.querySelectorAll<HTMLElement>("[data-group]").forEach((g) => {
const key = g.dataset.group!;
const active = g.querySelector<HTMLButtonElement>(".qg-chip-active");
state[key] = active?.dataset.value ?? "";
});

function applyRowFilters() {
tbody!.querySelectorAll<HTMLTableRowElement>("tr").forEach((tr) => {
let hide = false;
for (const [key, val] of Object.entries(state)) {
if (key === "metric" || !val) continue;
if (tr.dataset[key] !== val) {
hide = true;
break;
}
}
tr.classList.toggle("qg-chip-hidden", hide);
});
itableRoot?.dispatchEvent(new CustomEvent("qg-itable-reapply"));
}

function applyMetricMode() {
const primaryShown = state.metric !== "secondary";
// Scope the column-label + cell span toggles to *this* table only — if
// multiple tables coexist on a page, each bar controls only its own.
table!.querySelectorAll(".qg-col-label-primary").forEach((el) => el.classList.toggle("hidden", !primaryShown));
table!.querySelectorAll(".qg-col-label-secondary").forEach((el) => el.classList.toggle("hidden", primaryShown));
table!.querySelectorAll(".qg-cell-primary").forEach((el) => el.classList.toggle("hidden", !primaryShown));
table!.querySelectorAll(".qg-cell-secondary").forEach((el) => el.classList.toggle("hidden", primaryShown));
// Re-key sort value so a subsequent header click sorts by the now-visible metric.
table!.querySelectorAll<HTMLTableCellElement>("td[data-primary-value]").forEach((td) => {
const v = primaryShown ? td.dataset.primaryValue : td.dataset.secondaryValue;
td.dataset.sortValue = v ?? "";
});
// If a sort is active, re-fire it on the now-visible metric values.
itableRoot?.dispatchEvent(new CustomEvent("qg-itable-resort"));
}

bar.querySelectorAll<HTMLElement>("[data-group]").forEach((g) => {
const key = g.dataset.group!;
g.querySelectorAll<HTMLButtonElement>("button").forEach((btn) => {
btn.addEventListener("click", () => {
g.querySelectorAll("button").forEach((b) => b.classList.remove("qg-chip-active"));
btn.classList.add("qg-chip-active");
state[key] = btn.dataset.value ?? "";
if (key === "metric") applyMetricMode();
else applyRowFilters();
});
});
});

// Initial apply so any non-default starting chips take effect.
applyMetricMode();
applyRowFilters();
});
</script>
54 changes: 36 additions & 18 deletions reproducibility/site/src/components/InteractiveTable.astro
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,28 @@ const initialSortAttr = initialSort

<div class="qg-itable" data-initial-sort={initialSortAttr}>
<div class="mb-3 flex flex-wrap items-center gap-3 text-sm">
<input
type="search"
class="qg-itable-search w-64 rounded border border-qg-border bg-qg-bg-soft px-3 py-1.5 text-sm focus:border-qg-accent focus:outline-none"
placeholder={searchPlaceholder}
autocomplete="off"
/>
<span class="text-qg-fg-muted">
<div class="relative">
<svg
aria-hidden="true"
class="pointer-events-none absolute left-3 top-1/2 h-4 w-4 -translate-y-1/2 text-qg-fg-muted"
viewBox="0 0 24 24"
fill="none"
stroke="currentColor"
stroke-width="2"
stroke-linecap="round"
stroke-linejoin="round"
>
<circle cx="11" cy="11" r="8"></circle>
<path d="m21 21-4.3-4.3"></path>
</svg>
<input
type="search"
class="qg-itable-search w-72 rounded-lg border border-qg-border bg-qg-bg pl-9 pr-3 py-2 text-sm focus:border-qg-accent focus:outline-none focus:ring-1 focus:ring-qg-accent"
placeholder={searchPlaceholder}
autocomplete="off"
/>
</div>
<span class="text-xs text-qg-fg-muted">
<span class="qg-itable-shown">0</span> / <span class="qg-itable-total">0</span> rows
</span>
</div>
Expand All @@ -48,16 +63,6 @@ const initialSortAttr = initialSort
.qg-itable table thead th[data-sort-skip] {
cursor: default;
}
.qg-itable table thead th .qg-sort-arrow {
opacity: 0.35;
margin-left: 0.25rem;
font-size: 0.7rem;
}
.qg-itable table thead th[data-sort-dir="asc"] .qg-sort-arrow,
.qg-itable table thead th[data-sort-dir="desc"] .qg-sort-arrow {
opacity: 1;
color: var(--qg-accent);
}
</style>

<script>
Expand Down Expand Up @@ -102,7 +107,15 @@ const initialSortAttr = initialSort
const span = document.createElement("span");
span.className = "qg-sort-arrow";
span.textContent = "↕";
th.appendChild(span);
// Multi-line headers (e.g. dataset name on top, metric label below)
// can declare a .qg-sort-arrow-slot inline with the title so the
// arrow stays on the same line instead of dropping below the stack.
const slot = th.querySelector(".qg-sort-arrow-slot");
if (slot) {
slot.appendChild(span);
} else {
th.appendChild(span);
}
});
}

Expand Down Expand Up @@ -165,6 +178,11 @@ const initialSortAttr = initialSort
// External code can fire this event after toggling .qg-chip-hidden on
// rows to re-sync row visibility + the shown-count badge.
root.addEventListener("qg-itable-reapply", () => applySearch());
// After an external metric-toggle rekeys data-sort-value, re-fire the
// current sort so row order matches what the user is now looking at.
root.addEventListener("qg-itable-resort", () => {
if (currentSort) setSort(currentSort.column, currentSort.direction);
});

// Initial state.
applySearch();
Expand Down
51 changes: 51 additions & 0 deletions reproducibility/site/src/components/MatrixCell.astro
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
---
/**
* One cell in any of the leaderboard tables.
*
* Always renders two metric spans (primary + secondary) layered on top of
* each other; FilterChips' metric-toggle flips visibility via the global
* .qg-cell-primary / .qg-cell-secondary classes. The cell exposes
* data-primary-value and data-secondary-value so InteractiveTable's sort
* picks up whichever metric is currently visible. Missing values render
* as a muted em-dash so layout stays stable when one metric is absent.
*/
interface Cell {
value: number;
best: boolean;
}
interface Props {
primary?: Cell;
secondary?: Cell | null;
runId?: string | null;
digits?: number;
}
const { primary, secondary, runId, digits = 4 } = Astro.props;
const primaryValue = primary?.value ?? "";
const secondaryValue = secondary?.value ?? "";
const hasPrimary = primary !== undefined;
const hasSecondary = secondary != null && secondary !== undefined;
const linkable = runId && (hasPrimary || hasSecondary);
const primaryText = hasPrimary ? primary!.value.toFixed(digits) : "—";
const secondaryText = hasSecondary ? secondary!.value.toFixed(digits) : "—";
const primaryClass = `qg-cell-primary${primary?.best ? " qg-cell-best" : ""}`;
const secondaryClass = `qg-cell-secondary hidden${secondary?.best ? " qg-cell-best" : ""}`;
---

<td
class="qg-mono px-3 py-2 text-right tabular-nums"
data-sort-value={primaryValue}
data-primary-value={primaryValue}
data-secondary-value={secondaryValue}
>
{linkable ? (
<a class="hover:text-qg-accent hover:underline" href={`/runs/${runId}`} title="View run + reproduce">
<span class={primaryClass}>{primaryText}</span>
<span class={secondaryClass}>{secondaryText}</span>
</a>
) : (
<>
<span class={primaryClass}>{primaryText}</span>
<span class={secondaryClass}>{secondaryText}</span>
</>
)}
</td>
20 changes: 0 additions & 20 deletions reproducibility/site/src/components/MetricCell.astro

This file was deleted.

2 changes: 1 addition & 1 deletion reproducibility/site/src/components/Stat.astro
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ interface Props {
const { label, value } = Astro.props;
---

<div class="rounded-lg border border-qg-border bg-qg-bg-soft p-4">
<div class="rounded-lg border border-qg-border bg-qg-bg-soft p-4 transition-colors hover:border-qg-accent">
<div class="text-2xl font-bold tabular-nums">{value}</div>
<div class="mt-1 text-xs uppercase tracking-wide text-qg-fg-muted">{label}</div>
</div>
Loading
Loading