cased · tnm · Jan 7, 2026 · Jan 7, 2026
diff --git a/docs/src/content/docs/mcp/kit-dev-mcp.mdx b/docs/src/content/docs/mcp/kit-dev-mcp.mdx
@@ -82,18 +82,27 @@ The server provides many tools including:
 
 - **open_repository** - Open local or remote Git repositories
 - **search_code** - Pattern-based code search
-- **grep_code** - Fast literal string search
+- **grep_code** - Fast literal string search (120s default timeout, configurable via `KIT_GREP_TIMEOUT`)
+- **get_file_tree** - Repository file structure with pagination support (`limit`/`offset` params)
 - **get_file_content** - Read file contents
 - **extract_symbols** - Extract functions, classes, and symbols
 - **find_symbol_usages** - Find where symbols are used
 - **get_code_summary** - AI-powered code summaries
+- **warm_cache** - Pre-warm caches for faster operations on large codebases (100K+ files)
 - **review_diff** - AI-powered diff reviews
 - **deep_research_package** - Comprehensive package documentation
 - **semantic_search** - Vector-based code search
 - **package_search_grep** - Search package source code with regex patterns
 - **package_search_hybrid** - Semantic search in package source code
 - **package_search_read_file** - Read specific files from packages
 
+<Aside type="tip" title="Large Codebases">
+For repositories with 50K+ files (Linux kernel, Kubernetes, etc.):
+- Use `warm_cache` first to pre-load the file tree (~3-5s for 100K files)
+- Use `get_file_tree` with `limit`/`offset` for pagination
+- `grep_code` now defaults to 120s timeout (set `KIT_GREP_TIMEOUT` env var to adjust)
+</Aside>
+
 ## Learn More
 
 <LinkCard

diff --git a/src/kit/mcp/dev_server.py b/src/kit/mcp/dev_server.py
@@ -129,6 +129,22 @@ class GetFileTreeParams(BaseModel):
         default=False,
         description="Include directory entries (only relevant when compact=true).",
     )
+    limit: int = Field(
+        default=10000,
+        description="Maximum number of files to return. Use with offset for pagination on very large repos.",
+    )
+    offset: int = Field(
+        default=0,
+        description="Number of files to skip. Use with limit for pagination.",
+    )
+
+
+class WarmCacheParams(BaseModel):
+    """Pre-warm caches for faster subsequent operations on large codebases."""
+
+    repo_id: str
+    warm_file_tree: bool = Field(default=True, description="Pre-cache file tree (fast, recommended)")
+    warm_symbols: bool = Field(default=False, description="Pre-cache symbol extraction (slower, scans all files)")
 
 
 class GetSymbolCodeParams(BaseModel):
@@ -284,6 +300,44 @@ def get_file_tree(self, repo_id: str) -> List[Dict[str, Any]]:
         repo = self.get_repo(repo_id)
         return repo.get_file_tree()
 
+    def warm_cache(self, repo_id: str, warm_file_tree: bool = True, warm_symbols: bool = False) -> Dict[str, Any]:
+        """Pre-warm caches for faster subsequent operations on large codebases.
+
+        This is useful for very large repos where the first file_tree or symbol
+        extraction can take 30+ seconds. Warming caches upfront avoids timeouts.
+
+        Args:
+            repo_id: Repository ID to warm caches for
+            warm_file_tree: Pre-cache file tree (fast, ~1-5s for 100K files)
+            warm_symbols: Pre-cache symbols (slower, ~30-60s for 100K files)
+
+        Returns:
+            Dict with timing stats for each warmed cache
+        """
+        import time
+
+        repo = self.get_repo(repo_id)
+        stats: Dict[str, Any] = {"repo_id": repo_id}
+
+        if warm_file_tree:
+            start = time.time()
+            tree = repo.get_file_tree()
+            stats["file_tree"] = {
+                "elapsed_seconds": round(time.time() - start, 2),
+                "file_count": len(tree),
+            }
+
+        if warm_symbols:
+            start = time.time()
+            # Trigger full repo scan by calling extract_symbols with no file
+            symbols = repo.extract_symbols()
+            stats["symbols"] = {
+                "elapsed_seconds": round(time.time() - start, 2),
+                "symbol_count": len(symbols),
+            }
+
+        return stats
+
     def extract_symbols(self, repo_id: str, file_path: str, symbol_type: Optional[str] = None) -> List[Dict[str, Any]]:
         """Extract symbols from a file."""
         repo = self.get_repo(repo_id)
@@ -545,6 +599,11 @@ def list_tools(self) -> List[Tool]:
                 description="Get source code of a specific symbol (lazy loading for context efficiency)",
                 inputSchema=GetSymbolCodeParams.model_json_schema(),
             ),
+            Tool(
+                name="warm_cache",
+                description="Pre-warm caches for faster operations on large codebases (call before get_file_tree on huge repos)",
+                inputSchema=WarmCacheParams.model_json_schema(),
+            ),
         ]
 
 
@@ -1072,15 +1131,33 @@ async def call_tool(name: str, arguments: dict) -> List[TextContent]:
                 elif name == "get_file_tree":
                     tree_params = GetFileTreeParams(**arguments)
                     result = logic.get_file_tree(tree_params.repo_id)
+
+                    # Apply pagination for large codebases
+                    total_count = len(result)
+                    start = tree_params.offset
+                    end = start + tree_params.limit
+                    paginated = result[start:end]
+                    has_more = end < total_count
+
                     # Compact mode: newline-separated paths (saves ~75% context)
                     if tree_params.compact:
                         paths = []
-                        for item in result:
+                        for item in paginated:
                             is_dir = item.get("is_dir", False)
                             if tree_params.include_dirs or not is_dir:
                                 paths.append(item.get("path", ""))
-                        return [TextContent(type="text", text="\n".join(paths))]
-                    return [TextContent(type="text", text=json.dumps(result, indent=2))]
+                        # Include pagination metadata as header for compact mode
+                        header = f"# total={total_count} offset={start} limit={tree_params.limit} has_more={has_more}\n"
+                        return [TextContent(type="text", text=header + "\n".join(paths))]
+                    # JSON mode: include pagination in response
+                    response = {
+                        "files": paginated,
+                        "total_count": total_count,
+                        "offset": start,
+                        "limit": tree_params.limit,
+                        "has_more": has_more,
+                    }
+                    return [TextContent(type="text", text=json.dumps(response, indent=2))]
                 elif name == "get_code_summary":
                     summary_params = GetCodeSummaryParams(**arguments)
                     result = logic.get_code_summary(
@@ -1119,6 +1196,14 @@ async def call_tool(name: str, arguments: dict) -> List[TextContent]:
                         symbol_code_params.symbol_name,
                     )
                     return [TextContent(type="text", text=json.dumps(result, indent=2))]
+                elif name == "warm_cache":
+                    cache_params = WarmCacheParams(**arguments)
+                    result = logic.warm_cache(
+                        cache_params.repo_id,
+                        cache_params.warm_file_tree,
+                        cache_params.warm_symbols,
+                    )
+                    return [TextContent(type="text", text=json.dumps(result, indent=2))]
                 else:
                     # Should not happen since we checked the name is in the list
                     return [TextContent(type="text", text=f"Tool {name} is recognized but not implemented")]

diff --git a/src/kit/repository.py b/src/kit/repository.py
@@ -379,6 +379,7 @@ def grep(
         max_results: int = 1000,
         directory: Optional[str] = None,
         include_hidden: bool = False,
+        timeout: Optional[int] = None,
     ) -> List[Dict[str, Any]]:
         """
         Performs literal grep search on repository files using system grep.
@@ -389,8 +390,11 @@ def grep(
             include_pattern: Glob pattern for files to include (e.g. '*.py').
             exclude_pattern: Glob pattern for files to exclude.
             max_results: Maximum number of results to return. Defaults to 1000.
+                Uses grep's -m flag for early termination on large codebases.
             directory: Limit search to specific directory within repository (e.g. 'src', 'lib/utils').
             include_hidden: Whether to search hidden directories (starting with '.'). Defaults to False.
+            timeout: Search timeout in seconds. Defaults to 120s (or KIT_GREP_TIMEOUT env var).
+                For very large codebases (10M+ files), consider increasing this.
 
         Returns:
             List[Dict[str, Any]]: List of matches with file, line_number, line_content.
@@ -403,6 +407,17 @@ def grep(
 
         self._ensure_git_state_valid()
 
+        # Resolve timeout: parameter > env var > default (120s)
+        if timeout is None:
+            env_timeout = os.environ.get("KIT_GREP_TIMEOUT")
+            if env_timeout:
+                try:
+                    timeout = int(env_timeout)
+                except ValueError:
+                    timeout = 120
+            else:
+                timeout = 120
+
         # Build grep command
         cmd = ["grep", "-r", "-n", "-H"]  # -r for recursive, -n for line numbers, -H for filenames
 
@@ -477,6 +492,11 @@ def grep(
                 raise ValueError(f"Directory not found in repository: {directory}")
             search_path = directory
 
+        # Early termination: use -m flag to stop after max_results matches per file
+        # This provides massive speedups on large codebases by avoiding full traversal
+        if max_results > 0:
+            cmd.extend(["-m", str(max_results)])
+
         # Search recursively in specified directory
         cmd.append(search_path)
 
@@ -487,10 +507,13 @@ def grep(
                 capture_output=True,
                 text=True,
                 encoding="utf-8",
-                timeout=30,  # 30 second timeout
+                timeout=timeout,
             )
         except subprocess.TimeoutExpired:
-            raise RuntimeError("Grep search timed out after 30 seconds")
+            raise RuntimeError(
+                f"Grep search timed out after {timeout} seconds. "
+                "Set KIT_GREP_TIMEOUT env var or timeout parameter for longer searches."
+            )
         except FileNotFoundError:
             raise RuntimeError("grep command not found. Please ensure grep is installed and in PATH.")