Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion docs/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -329,7 +329,18 @@ Downloads and extracts the output zip archive for each completed run. Files are
├── output files...
```

Already-downloaded runs (where the output directory exists) are automatically skipped unless the `-f` / `--force` flag is used, in which case they are overwritten.
Progress is rendered as a table with one row per run:

```
Model File Size Progress
──────────────────── ──────────────────────────────────────── ────────── ──────────
gemini-2.5-pro gemini-2.5-pro/12345/ 1.24MB Done
claude-sonnet-4 claude-sonnet-4/12346/ 2.10MB Cached
```

The `Size` column reports the extracted on-disk size of the run's output directory. The `Progress` column is one of `Done` (freshly downloaded), `Cached` (output directory already on disk from a previous download), or `Bad zip` (downloaded archive was corrupt).

Already-downloaded runs (where the output directory exists) are automatically skipped — they appear as `Cached` rows — unless the `-f` / `--force` flag is used, in which case they are overwritten.

When `--include-source` is used, the downloaded zip also contains the kernel session's source files (e.g., `__notebook__.ipynb` and `__notebook_source__.ipynb`).

Expand Down
5 changes: 3 additions & 2 deletions skills/references/benchmarks.md
Original file line number Diff line number Diff line change
Expand Up @@ -254,8 +254,9 @@ kaggle b t download my-task --include-source
**Behavior details:**
- Downloads outputs for all runs in a **terminal state** — this includes both `COMPLETED` and `ERRORED` runs (errored runs may still have partial output)
- Downloads zip archives and extracts them automatically
- Already-downloaded runs are skipped (use `--force` to re-download): `Skipping gemini-2.5-pro (run 123) — already downloaded to ./my-task/1/gemini-2.5-pro/123`
- Corrupt zips: Warning printed, raw `.zip` file kept, continues with other models
- Progress is rendered as a table with `Model | File | Size | Progress` columns. `Size` is the extracted on-disk size of the run's output directory.
- Already-downloaded runs are skipped (use `--force` to re-download) and shown as a `Cached` row in the table
- Corrupt zips show as a `Bad zip` row; the raw `.zip` is kept on disk and the next run is processed
- No downloadable runs (all still in progress): `No downloadable runs yet — N run(s) still in progress. Use 'kaggle b t status my-task' to check progress.`
- No runs at all: `No runs found for task 'my-task'. Use 'kaggle b t run my-task' to start one.`

Expand Down
16 changes: 7 additions & 9 deletions src/kaggle/api/kaggle_api_extended.py
Original file line number Diff line number Diff line change
Expand Up @@ -7562,9 +7562,7 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
print(f"Downloading output runs for {task}")
print(f"Target directory: {target_dir}/\n")

display_files = [
f"{self._normalize_model_slug(r.model_version_slug)}/{r.id}/{r.id}.zip" for r in downloadable
]
display_files = [f"{self._normalize_model_slug(r.model_version_slug)}/{r.id}/" for r in downloadable]
model_col = max((len(self._normalize_model_slug(r.model_version_slug)) for r in downloadable), default=20)
model_col = max(model_col, 20)
file_col = max((len(f) for f in display_files), default=40)
Expand All @@ -7575,7 +7573,7 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
print(f"{'Model':<{model_col}} {'File':<{file_col}} {'Size':<{size_col}} {'Progress':<{prog_col}}")
print(f"{'─' * model_col} {'─' * file_col} {'─' * size_col} {'─' * prog_col}")

downloaded, skipped = 0, 0
downloaded, cached = 0, 0
for r, display_file in zip(downloadable, display_files):
slug = self._normalize_model_slug(r.model_version_slug)
# Hierarchical layout: {output}/{task}/{version}/{model}/{run_id}/
Expand All @@ -7584,8 +7582,8 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s

if os.path.isdir(outdir) and not force:
size_str = self._format_size(self._dir_size(outdir))
print(f"{row_prefix} {size_str:<{size_col}} {'Skipped':<{prog_col}}")
skipped += 1
print(f"{row_prefix} {size_str:<{size_col}} {'Cached':<{prog_col}}")
cached += 1
continue

dl_request = ApiDownloadBenchmarkTaskRunOutputRequest()
Expand All @@ -7605,7 +7603,6 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
try:
# quiet=True: intermediate zip, extracted and removed below
self.download_file(response, zipfile_path, kaggle.http_client(), quiet=True)
size_str = self._format_size(os.path.getsize(zipfile_path)) if os.path.exists(zipfile_path) else ""
# Note: extractall() is safe here because the zip originates from
# the trusted Kaggle server, not user-supplied input (zip-slip).
with zipfile.ZipFile(zipfile_path, "r") as zf:
Expand All @@ -7627,11 +7624,12 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
if os.path.isdir(outdir):
shutil.rmtree(outdir)
os.rename(tmp_outdir, outdir)
# Report extracted on-disk size, matching the cached branch above.
size_str = self._format_size(self._dir_size(outdir))
downloaded += 1
print(f"{row_prefix} {size_str:<{size_col}} {'Done':<{prog_col}}")

# Summary
parts = [f"{n} run(s) {label}" for n, label in ((downloaded, "downloaded"), (skipped, "skipped")) if n]
parts = [f"{n} run(s) {label}" for n, label in ((downloaded, "downloaded"), (cached, "cached")) if n]
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

skipped -> cached. leave to @simsryan-google for a decision

print(f"\nDone: {', '.join(parts) or '0 runs downloaded'}.")

@staticmethod
Expand Down
13 changes: 8 additions & 5 deletions src/kaggle/test/test_benchmarks_cli.py
Comment thread
nl917 marked this conversation as resolved.
Original file line number Diff line number Diff line change
Expand Up @@ -1180,7 +1180,7 @@ def test_download_all_pending_shows_message(self, api, capsys):
assert "kaggle b t status my-task" in output

def test_download_skips_existing_output(self, api, capsys, tmp_path):
"""Already-downloaded runs are skipped without making API calls."""
"""Already-downloaded runs render as Cached without making API calls."""
_setup_runs_response(api, [_make_run(run_id=42)])
self._mock_download(api)
outdir = str(tmp_path / "out")
Expand All @@ -1192,13 +1192,16 @@ def test_download_skips_existing_output(self, api, capsys, tmp_path):

output = capsys.readouterr().out
assert "gemini-pro" in output
assert "Skipped" in output
assert "1 run(s) skipped" in output
assert "Cached" in output
assert "1 run(s) cached" in output
# The File column shows the output directory, not a .zip path
assert "gemini-pro/42/" in output
assert "/42/42.zip" not in output
# No download API call should have been made
api._mock_benchmarks.download_benchmark_task_run_output.assert_not_called()

def test_download_summary_counts(self, api, capsys, tmp_path):
"""Download summary shows correct downloaded and skipped counts."""
"""Download summary shows correct downloaded and cached counts."""
_setup_runs_response(
api,
[_make_run(model="new-model", run_id=1), _make_run(model="old-model", run_id=2)],
Expand All @@ -1214,7 +1217,7 @@ def test_download_summary_counts(self, api, capsys, tmp_path):

output = capsys.readouterr().out
assert "1 run(s) downloaded" in output
assert "1 run(s) skipped" in output
assert "1 run(s) cached" in output

def test_download_force_overwrites_existing_output(self, api, capsys, tmp_path):
"""Using force=True re-downloads and overwrites existing output."""
Expand Down
Loading