Skip to content

Commit 0fac618

Browse files
committed
Improve official results browser filter clarity
1 parent f02fb71 commit 0fac618

File tree

2 files changed

+114
-18
lines changed

2 files changed

+114
-18
lines changed

docs/official_results/index.html

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,8 @@
99
body { margin:0; font-family: ui-sans-serif, system-ui, sans-serif; background:var(--bg); color:var(--text); }
1010
.wrap { max-width: 1180px; margin: 0 auto; padding: 20px; }
1111
.controls { display:flex; gap:12px; flex-wrap:wrap; margin-bottom: 14px; }
12+
.meta { color: var(--muted); font-size: 12px; margin: 4px 0 10px; }
13+
button { background: transparent; color: var(--text); border:1px solid #2e3d4a; border-radius:8px; padding:8px 10px; cursor:pointer; }
1214
select,input { background:var(--panel); color:var(--text); border:1px solid #2e3d4a; border-radius:8px; padding:8px; }
1315
table { width:100%; border-collapse: collapse; background:var(--panel); border-radius:10px; overflow:hidden; }
1416
th,td { border-bottom: 1px solid #2d3a45; padding: 8px 10px; text-align:left; font-size: 13px; }
@@ -33,7 +35,9 @@ <h1>Official Results Browser</h1>
3335
<select id="configFilter"><option value="">All configs</option></select>
3436
<select id="statusFilter"><option value="">All statuses</option><option>passed</option><option>failed</option></select>
3537
<input id="taskSearch" placeholder="Search task" />
38+
<button id="clearFilters" type="button">Clear filters</button>
3639
</div>
40+
<div id="stats" class="meta"></div>
3741
<table>
3842
<thead><tr><th>Suite</th><th>Run</th><th>Config</th><th>Task</th><th>Status</th><th>Reward</th><th>MCP ratio</th><th>Tools</th><th>Trace</th></tr></thead>
3943
<tbody id="rows"></tbody>
@@ -47,33 +51,64 @@ <h1>Official Results Browser</h1>
4751
const configFilter = document.getElementById('configFilter');
4852
const statusFilter = document.getElementById('statusFilter');
4953
const taskSearch = document.getElementById('taskSearch');
54+
const clearFilters = document.getElementById('clearFilters');
55+
const statsEl = document.getElementById('stats');
5056

5157
function fmt(v, d=3) { return (v===null || v===undefined) ? '-' : Number(v).toFixed(d); }
58+
function uniqueSorted(values) { return [...new Set(values)].sort(); }
59+
function resetOptions(selectEl, allLabel, values) {
60+
const previous = selectEl.value;
61+
selectEl.innerHTML = '';
62+
selectEl.add(new Option(allLabel, ''));
63+
values.forEach(v => selectEl.add(new Option(v, v)));
64+
if (previous && values.includes(previous)) {
65+
selectEl.value = previous;
66+
} else {
67+
selectEl.value = '';
68+
}
69+
}
5270

5371
fetch('data/official_results.json').then(r => r.json()).then(data => {
5472
const allTasks = data.all_tasks || data.tasks || [];
5573
const dedupedTasks = data.tasks || [];
56-
const suites = [...new Set(allTasks.map(t => t.suite || 'unknown'))].sort();
57-
const runs = [...new Set(allTasks.map(t => t.run_dir))].sort();
58-
const configs = [...new Set(allTasks.map(t => t.config))].sort();
74+
const suites = uniqueSorted(allTasks.map(t => t.suite || 'unknown'));
5975
suites.forEach(s => suiteFilter.add(new Option(s, s)));
60-
runs.forEach(r => runFilter.add(new Option(r, r)));
61-
configs.forEach(c => configFilter.add(new Option(c, c)));
76+
resetOptions(runFilter, 'All runs', uniqueSorted(allTasks.map(t => t.run_dir)));
77+
resetOptions(configFilter, 'All configs', uniqueSorted(allTasks.map(t => t.config)));
6278

6379
const render = () => {
6480
const dataset = datasetFilter.value || 'all';
6581
const tasks = dataset === 'latest' ? dedupedTasks : allTasks;
82+
const suiteScoped = suiteFilter.value
83+
? tasks.filter(t => (t.suite || 'unknown') === suiteFilter.value)
84+
: tasks;
85+
resetOptions(runFilter, 'All runs', uniqueSorted(suiteScoped.map(t => t.run_dir)));
86+
const runScoped = runFilter.value
87+
? suiteScoped.filter(t => t.run_dir === runFilter.value)
88+
: suiteScoped;
89+
resetOptions(configFilter, 'All configs', uniqueSorted(runScoped.map(t => t.config)));
6690
const sfu = suiteFilter.value;
6791
const rf = runFilter.value;
6892
const cf = configFilter.value;
6993
const sf = statusFilter.value;
7094
const q = taskSearch.value.trim().toLowerCase();
95+
const filtered = tasks.filter(t =>
96+
(!sfu || (t.suite || 'unknown') === sfu) &&
97+
(!rf || t.run_dir === rf) &&
98+
(!cf || t.config === cf) &&
99+
(!sf || t.status === sf) &&
100+
(!q || t.task_name.toLowerCase().includes(q))
101+
);
102+
const limited = filtered.slice(0, 1200);
103+
const active = [];
104+
if (sfu) active.push(`suite=${sfu}`);
105+
if (rf) active.push(`run=${rf}`);
106+
if (cf) active.push(`config=${cf}`);
107+
if (sf) active.push(`status=${sf}`);
108+
if (q) active.push(`search=${q}`);
71109

72110
rowsEl.innerHTML = '';
73-
tasks
74-
.filter(t => (!sfu || (t.suite || 'unknown') === sfu) && (!rf || t.run_dir === rf) && (!cf || t.config === cf) && (!sf || t.status === sf) && (!q || t.task_name.toLowerCase().includes(q)))
75-
.slice(0, 1200)
76-
.forEach(t => {
111+
limited.forEach(t => {
77112
const tr = document.createElement('tr');
78113
const trace = `${t.trace_available.trajectory ? 'traj ' : ''}${t.trace_available.transcript ? 'tx' : ''}`.trim() || '-';
79114
const repoLink = t.task_page_github ? `<a href="${t.task_page_github}" target="_blank" rel="noopener">repo</a>` : '';
@@ -94,6 +129,10 @@ <h1>Official Results Browser</h1>
94129
`;
95130
rowsEl.appendChild(tr);
96131
});
132+
const modeLabel = dataset === 'latest' ? 'Latest per suite+config+task' : 'All task runs';
133+
const capNote = filtered.length > limited.length ? ` (capped to ${limited.length})` : '';
134+
const activeLabel = active.length ? ` | Active: ${active.join(', ')}` : '';
135+
statsEl.textContent = `Dataset: ${modeLabel} | Showing ${filtered.length} of ${tasks.length}${capNote}${activeLabel}`;
97136
};
98137

99138
suiteFilter.addEventListener('change', render);
@@ -102,6 +141,15 @@ <h1>Official Results Browser</h1>
102141
configFilter.addEventListener('change', render);
103142
statusFilter.addEventListener('change', render);
104143
taskSearch.addEventListener('input', render);
144+
clearFilters.addEventListener('click', () => {
145+
suiteFilter.value = '';
146+
datasetFilter.value = 'all';
147+
runFilter.value = '';
148+
configFilter.value = '';
149+
statusFilter.value = '';
150+
taskSearch.value = '';
151+
render();
152+
});
105153
render();
106154
});
107155
</script>

scripts/export_official_results.py

Lines changed: 57 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1108,6 +1108,8 @@ def _build_index_html() -> str:
11081108
body { margin:0; font-family: ui-sans-serif, system-ui, sans-serif; background:var(--bg); color:var(--text); }
11091109
.wrap { max-width: 1180px; margin: 0 auto; padding: 20px; }
11101110
.controls { display:flex; gap:12px; flex-wrap:wrap; margin-bottom: 14px; }
1111+
.meta { color: var(--muted); font-size: 12px; margin: 4px 0 10px; }
1112+
button { background: transparent; color: var(--text); border:1px solid #2e3d4a; border-radius:8px; padding:8px 10px; cursor:pointer; }
11111113
select,input { background:var(--panel); color:var(--text); border:1px solid #2e3d4a; border-radius:8px; padding:8px; }
11121114
table { width:100%; border-collapse: collapse; background:var(--panel); border-radius:10px; overflow:hidden; }
11131115
th,td { border-bottom: 1px solid #2d3a45; padding: 8px 10px; text-align:left; font-size: 13px; }
@@ -1132,7 +1134,9 @@ def _build_index_html() -> str:
11321134
<select id=\"configFilter\"><option value=\"\">All configs</option></select>
11331135
<select id=\"statusFilter\"><option value=\"\">All statuses</option><option>passed</option><option>failed</option></select>
11341136
<input id=\"taskSearch\" placeholder=\"Search task\" />
1137+
<button id=\"clearFilters\" type=\"button\">Clear filters</button>
11351138
</div>
1139+
<div id=\"stats\" class=\"meta\"></div>
11361140
<table>
11371141
<thead><tr><th>Suite</th><th>Run</th><th>Config</th><th>Task</th><th>Status</th><th>Reward</th><th>MCP ratio</th><th>Tools</th><th>Trace</th></tr></thead>
11381142
<tbody id=\"rows\"></tbody>
@@ -1146,33 +1150,64 @@ def _build_index_html() -> str:
11461150
const configFilter = document.getElementById('configFilter');
11471151
const statusFilter = document.getElementById('statusFilter');
11481152
const taskSearch = document.getElementById('taskSearch');
1153+
const clearFilters = document.getElementById('clearFilters');
1154+
const statsEl = document.getElementById('stats');
11491155
11501156
function fmt(v, d=3) { return (v===null || v===undefined) ? '-' : Number(v).toFixed(d); }
1157+
function uniqueSorted(values) { return [...new Set(values)].sort(); }
1158+
function resetOptions(selectEl, allLabel, values) {
1159+
const previous = selectEl.value;
1160+
selectEl.innerHTML = '';
1161+
selectEl.add(new Option(allLabel, ''));
1162+
values.forEach(v => selectEl.add(new Option(v, v)));
1163+
if (previous && values.includes(previous)) {
1164+
selectEl.value = previous;
1165+
} else {
1166+
selectEl.value = '';
1167+
}
1168+
}
11511169
11521170
fetch('data/official_results.json').then(r => r.json()).then(data => {
11531171
const allTasks = data.all_tasks || data.tasks || [];
11541172
const dedupedTasks = data.tasks || [];
1155-
const suites = [...new Set(allTasks.map(t => t.suite || 'unknown'))].sort();
1156-
const runs = [...new Set(allTasks.map(t => t.run_dir))].sort();
1157-
const configs = [...new Set(allTasks.map(t => t.config))].sort();
1173+
const suites = uniqueSorted(allTasks.map(t => t.suite || 'unknown'));
11581174
suites.forEach(s => suiteFilter.add(new Option(s, s)));
1159-
runs.forEach(r => runFilter.add(new Option(r, r)));
1160-
configs.forEach(c => configFilter.add(new Option(c, c)));
1175+
resetOptions(runFilter, 'All runs', uniqueSorted(allTasks.map(t => t.run_dir)));
1176+
resetOptions(configFilter, 'All configs', uniqueSorted(allTasks.map(t => t.config)));
11611177
11621178
const render = () => {
11631179
const dataset = datasetFilter.value || 'all';
11641180
const tasks = dataset === 'latest' ? dedupedTasks : allTasks;
1181+
const suiteScoped = suiteFilter.value
1182+
? tasks.filter(t => (t.suite || 'unknown') === suiteFilter.value)
1183+
: tasks;
1184+
resetOptions(runFilter, 'All runs', uniqueSorted(suiteScoped.map(t => t.run_dir)));
1185+
const runScoped = runFilter.value
1186+
? suiteScoped.filter(t => t.run_dir === runFilter.value)
1187+
: suiteScoped;
1188+
resetOptions(configFilter, 'All configs', uniqueSorted(runScoped.map(t => t.config)));
11651189
const sfu = suiteFilter.value;
11661190
const rf = runFilter.value;
11671191
const cf = configFilter.value;
11681192
const sf = statusFilter.value;
11691193
const q = taskSearch.value.trim().toLowerCase();
1194+
const filtered = tasks.filter(t =>
1195+
(!sfu || (t.suite || 'unknown') === sfu) &&
1196+
(!rf || t.run_dir === rf) &&
1197+
(!cf || t.config === cf) &&
1198+
(!sf || t.status === sf) &&
1199+
(!q || t.task_name.toLowerCase().includes(q))
1200+
);
1201+
const limited = filtered.slice(0, 1200);
1202+
const active = [];
1203+
if (sfu) active.push(`suite=${sfu}`);
1204+
if (rf) active.push(`run=${rf}`);
1205+
if (cf) active.push(`config=${cf}`);
1206+
if (sf) active.push(`status=${sf}`);
1207+
if (q) active.push(`search=${q}`);
11701208
11711209
rowsEl.innerHTML = '';
1172-
tasks
1173-
.filter(t => (!sfu || (t.suite || 'unknown') === sfu) && (!rf || t.run_dir === rf) && (!cf || t.config === cf) && (!sf || t.status === sf) && (!q || t.task_name.toLowerCase().includes(q)))
1174-
.slice(0, 1200)
1175-
.forEach(t => {
1210+
limited.forEach(t => {
11761211
const tr = document.createElement('tr');
11771212
const trace = `${t.trace_available.trajectory ? 'traj ' : ''}${t.trace_available.transcript ? 'tx' : ''}`.trim() || '-';
11781213
const repoLink = t.task_page_github ? `<a href=\"${t.task_page_github}\" target=\"_blank\" rel=\"noopener\">repo</a>` : '';
@@ -1193,6 +1228,10 @@ def _build_index_html() -> str:
11931228
`;
11941229
rowsEl.appendChild(tr);
11951230
});
1231+
const modeLabel = dataset === 'latest' ? 'Latest per suite+config+task' : 'All task runs';
1232+
const capNote = filtered.length > limited.length ? ` (capped to ${limited.length})` : '';
1233+
const activeLabel = active.length ? ` | Active: ${active.join(', ')}` : '';
1234+
statsEl.textContent = `Dataset: ${modeLabel} | Showing ${filtered.length} of ${tasks.length}${capNote}${activeLabel}`;
11961235
};
11971236
11981237
suiteFilter.addEventListener('change', render);
@@ -1201,6 +1240,15 @@ def _build_index_html() -> str:
12011240
configFilter.addEventListener('change', render);
12021241
statusFilter.addEventListener('change', render);
12031242
taskSearch.addEventListener('input', render);
1243+
clearFilters.addEventListener('click', () => {
1244+
suiteFilter.value = '';
1245+
datasetFilter.value = 'all';
1246+
runFilter.value = '';
1247+
configFilter.value = '';
1248+
statusFilter.value = '';
1249+
taskSearch.value = '';
1250+
render();
1251+
});
12041252
render();
12051253
});
12061254
</script>

0 commit comments

Comments
 (0)