Kaggle · nl917 · May 29, 2026 · May 29, 2026 · dolaameng · May 29, 2026
diff --git a/src/kaggle/api/kaggle_api_extended.py b/src/kaggle/api/kaggle_api_extended.py
@@ -7562,9 +7562,9 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
             print(f"Downloading output runs for {task}")
             print(f"Target directory:  {target_dir}/\n")
 
-            display_files = [
-                f"{self._normalize_model_slug(r.model_version_slug)}/{r.id}/{r.id}.zip" for r in downloadable
-            ]
+            # Show the extracted output directory (what is actually left on disk after
+            # extraction), not the intermediate .zip that gets removed.
+            display_files = [f"{self._normalize_model_slug(r.model_version_slug)}/{r.id}/" for r in downloadable]
             model_col = max((len(self._normalize_model_slug(r.model_version_slug)) for r in downloadable), default=20)
             model_col = max(model_col, 20)
             file_col = max((len(f) for f in display_files), default=40)
@@ -7575,7 +7575,7 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
             print(f"{'Model':<{model_col}} {'File':<{file_col}} {'Size':<{size_col}} {'Progress':<{prog_col}}")
             print(f"{'─' * model_col} {'─' * file_col} {'─' * size_col} {'─' * prog_col}")
 
-            downloaded, skipped = 0, 0
+            downloaded, cached, cached_without_source = 0, 0, 0
             for r, display_file in zip(downloadable, display_files):
                 slug = self._normalize_model_slug(r.model_version_slug)
                 # Hierarchical layout: {output}/{task}/{version}/{model}/{run_id}/
@@ -7584,8 +7584,16 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
 
                 if os.path.isdir(outdir) and not force:
                     size_str = self._format_size(self._dir_size(outdir))
-                    print(f"{row_prefix} {size_str:<{size_col}} {'Skipped':<{prog_col}}")
-                    skipped += 1
+                    print(f"{row_prefix} {size_str:<{size_col}} {'Cached':<{prog_col}}")
+                    cached += 1
+                    # If the caller asked for source notebooks with -s but the cached dir
+                    # was built without them, count it so we can emit a tip at the end.
+                    # Skip detection requires --force to re-download.
+                    if include_source and not any(
+                        os.path.exists(os.path.join(outdir, n))
+                        for n in ("__notebook__.ipynb", "__notebook_source__.ipynb")
+                    ):
+                        cached_without_source += 1
                     continue
 
                 dl_request = ApiDownloadBenchmarkTaskRunOutputRequest()
@@ -7630,10 +7638,20 @@ def benchmarks_tasks_download_cli(self, task, model=None, output=None, include_s
                 downloaded += 1
                 print(f"{row_prefix} {size_str:<{size_col}} {'Done':<{prog_col}}")
 
-            # Summary
-            parts = [f"{n} run(s) {label}" for n, label in ((downloaded, "downloaded"), (skipped, "skipped")) if n]
+            # Summary — counts are runs (one run may include multiple files, e.g. with --include-source).
+            # "Cached" = already on disk from a previous download; the data is available without a fetch.
+            parts = [f"{n} run(s) {label}" for n, label in ((downloaded, "downloaded"), (cached, "cached")) if n]
             print(f"\nDone: {', '.join(parts) or '0 runs downloaded'}.")
 
+            # Tip: -s alone won't backfill source notebooks into already-cached dirs.
+            # The check that gates re-download (os.path.isdir(outdir)) doesn't peek inside,
+            # so the cached row stays untouched even though it lacks the requested files.
+            if cached_without_source:
+                print(
+                    f"\nTip: {cached_without_source} cached run(s) lack source notebooks. "
+                    f"Re-run with -f -s to fetch them."
+                )
+
     @staticmethod
     def _format_size(n) -> str:
         """Render a byte count as ``1.06KB`` / ``2.34MB`` / etc."""

diff --git a/src/kaggle/test/test_benchmarks_cli.py b/src/kaggle/test/test_benchmarks_cli.py
@@ -1180,7 +1180,7 @@ def test_download_all_pending_shows_message(self, api, capsys):
         assert "kaggle b t status my-task" in output
 
     def test_download_skips_existing_output(self, api, capsys, tmp_path):
-        """Already-downloaded runs are skipped without making API calls."""
+        """Already-downloaded runs render as Cached without making API calls."""
         _setup_runs_response(api, [_make_run(run_id=42)])
         self._mock_download(api)
         outdir = str(tmp_path / "out")
@@ -1192,13 +1192,54 @@ def test_download_skips_existing_output(self, api, capsys, tmp_path):
 
         output = capsys.readouterr().out
         assert "gemini-pro" in output
-        assert "Skipped" in output
-        assert "1 run(s) skipped" in output
+        assert "Cached" in output
+        assert "1 run(s) cached" in output
+        # The File column shows the output directory, not a .zip path
+        assert "gemini-pro/42/" in output
+        assert "/42/42.zip" not in output
         # No download API call should have been made
         api._mock_benchmarks.download_benchmark_task_run_output.assert_not_called()
 
+    def test_download_cached_dir_without_source_prints_tip_when_s_passed(self, api, capsys, tmp_path):
+        """When -s is passed but the cached dir lacks source notebooks, hint that -f -s is needed."""
+        _setup_runs_response(api, [_make_run(run_id=42)])
+        self._mock_download(api)
+        outdir = str(tmp_path / "out")
+        # Cached dir exists but has no __notebook__.ipynb / __notebook_source__.ipynb
+        existing = os.path.join(outdir, "my-task", "1", "gemini-pro", "42")
+        os.makedirs(existing)
+        # Drop a placeholder file so the dir isn't empty, but not a source notebook
+        with open(os.path.join(existing, "result.run.json"), "w") as f:
+            f.write("{}")
+
+        api.benchmarks_tasks_download_cli("my-task", output=outdir, include_source=True)
+        output = capsys.readouterr().out
+
+        assert "Cached" in output
+        assert "1 cached run(s) lack source notebooks" in output or "lack source notebooks" in output
+        assert "-f -s" in output
+        # Without -f, the cached dir was not touched: no download API call
+        api._mock_benchmarks.download_benchmark_task_run_output.assert_not_called()
+
+    def test_download_cached_dir_with_source_does_not_print_tip(self, api, capsys, tmp_path):
+        """When the cached dir already contains source notebooks, no tip is shown."""
+        _setup_runs_response(api, [_make_run(run_id=42)])
+        self._mock_download(api)
+        outdir = str(tmp_path / "out")
+        existing = os.path.join(outdir, "my-task", "1", "gemini-pro", "42")
+        os.makedirs(existing)
+        # Simulate a previous -s download by dropping the source notebook
+        with open(os.path.join(existing, "__notebook__.ipynb"), "w") as f:
+            f.write("{}")
+
+        api.benchmarks_tasks_download_cli("my-task", output=outdir, include_source=True)
+        output = capsys.readouterr().out
+
+        assert "Cached" in output
+        assert "lack source notebooks" not in output
+
     def test_download_summary_counts(self, api, capsys, tmp_path):
-        """Download summary shows correct downloaded and skipped counts."""
+        """Download summary shows correct downloaded and cached counts."""
         _setup_runs_response(
             api,
             [_make_run(model="new-model", run_id=1), _make_run(model="old-model", run_id=2)],
@@ -1214,7 +1255,7 @@ def test_download_summary_counts(self, api, capsys, tmp_path):
 
         output = capsys.readouterr().out
         assert "1 run(s) downloaded" in output
-        assert "1 run(s) skipped" in output
+        assert "1 run(s) cached" in output
 
     def test_download_force_overwrites_existing_output(self, api, capsys, tmp_path):
         """Using force=True re-downloads and overwrites existing output."""