|
| 1 | +import path from 'path'; |
| 2 | +import fs from 'fs-extra'; |
| 3 | +import { inferWorkspaceRoot, resolveGitRoot } from '../../core/git'; |
| 4 | +import { defaultDbDir, openTablesByLang, type IndexLang } from '../../core/lancedb'; |
| 5 | +import { queryManifestWorkspace } from '../../core/workspace'; |
| 6 | +import { inferSymbolSearchMode, type SymbolSearchMode } from '../../core/symbolSearch'; |
| 7 | +import { createLogger } from '../../core/log'; |
| 8 | +import { resolveLangs } from '../../core/indexCheck'; |
| 9 | +import { generateRepoMap, type FileRank } from '../../core/repoMap'; |
| 10 | +import type { CLIResult, CLIError } from '../types'; |
| 11 | +import { success, error } from '../types'; |
| 12 | +import { resolveRepoContext, validateIndex, resolveLanguages, type RepoContext } from '../helpers'; |
| 13 | +import type { SearchFilesInput } from '../schemas/queryFilesSchemas'; |
| 14 | + |
| 15 | +function isCLIError(value: unknown): value is CLIError { |
| 16 | + return typeof value === 'object' && value !== null && 'ok' in value && (value as any).ok === false; |
| 17 | +} |
| 18 | + |
| 19 | +async function buildRepoMapAttachment( |
| 20 | + repoRoot: string, |
| 21 | + options: { wiki: string; repoMapFiles: number; repoMapSymbols: number } |
| 22 | +): Promise<{ enabled: boolean; wikiDir: string; files: FileRank[] } | { enabled: boolean; skippedReason: string }> { |
| 23 | + try { |
| 24 | + const wikiDir = resolveWikiDir(repoRoot, options.wiki); |
| 25 | + const files = await generateRepoMap({ |
| 26 | + repoRoot, |
| 27 | + maxFiles: options.repoMapFiles, |
| 28 | + maxSymbolsPerFile: options.repoMapSymbols, |
| 29 | + wikiDir, |
| 30 | + }); |
| 31 | + return { enabled: true, wikiDir, files }; |
| 32 | + } catch (e: any) { |
| 33 | + return { enabled: false, skippedReason: String(e?.message ?? e) }; |
| 34 | + } |
| 35 | +} |
| 36 | + |
| 37 | +function resolveWikiDir(repoRoot: string, wikiOpt: string): string { |
| 38 | + const w = String(wikiOpt ?? '').trim(); |
| 39 | + if (w) return path.resolve(repoRoot, w); |
| 40 | + const candidates = [path.join(repoRoot, 'docs', 'wiki'), path.join(repoRoot, 'wiki')]; |
| 41 | + for (const c of candidates) { |
| 42 | + if (fs.existsSync(c)) return c; |
| 43 | + } |
| 44 | + return ''; |
| 45 | +} |
| 46 | + |
| 47 | +function inferLangFromFile(file: string): IndexLang { |
| 48 | + const f = String(file); |
| 49 | + if (f.endsWith('.md') || f.endsWith('.mdx')) return 'markdown'; |
| 50 | + if (f.endsWith('.yml') || f.endsWith('.yaml')) return 'yaml'; |
| 51 | + if (f.endsWith('.java')) return 'java'; |
| 52 | + if (f.endsWith('.c') || f.endsWith('.h')) return 'c'; |
| 53 | + if (f.endsWith('.go')) return 'go'; |
| 54 | + if (f.endsWith('.py')) return 'python'; |
| 55 | + if (f.endsWith('.rs')) return 'rust'; |
| 56 | + return 'ts'; |
| 57 | +} |
| 58 | + |
| 59 | +function filterWorkspaceRowsByLang(rows: any[], langSel: string): any[] { |
| 60 | + const sel = String(langSel ?? 'auto'); |
| 61 | + if (sel === 'auto' || sel === 'all') return rows; |
| 62 | + const target = sel as IndexLang; |
| 63 | + return rows.filter(r => inferLangFromFile(String((r as any).file ?? '')) === target); |
| 64 | +} |
| 65 | + |
| 66 | +function escapeQuotes(s: string): string { |
| 67 | + return s.replace(/'/g, "''"); |
| 68 | +} |
| 69 | + |
| 70 | +function buildFileWhere(pattern: string, mode: SymbolSearchMode, caseInsensitive: boolean): string | null { |
| 71 | + const safe = escapeQuotes(pattern); |
| 72 | + if (!safe) return null; |
| 73 | + const likeOp = caseInsensitive ? 'ILIKE' : 'LIKE'; |
| 74 | + |
| 75 | + if (mode === 'prefix') { |
| 76 | + return `file ${likeOp} '${safe}%'`; |
| 77 | + } |
| 78 | + |
| 79 | + if (mode === 'substring' || mode === 'wildcard') { |
| 80 | + return `file ${likeOp} '%${safe}%'`; |
| 81 | + } |
| 82 | + |
| 83 | + // For regex and fuzzy, we'll handle them in memory after fetching |
| 84 | + return null; |
| 85 | +} |
| 86 | + |
| 87 | +function buildRegex(pattern: string, caseInsensitive: boolean): RegExp | null { |
| 88 | + try { |
| 89 | + const flags = caseInsensitive ? 'i' : ''; |
| 90 | + return new RegExp(pattern, flags); |
| 91 | + } catch { |
| 92 | + return null; |
| 93 | + } |
| 94 | +} |
| 95 | + |
| 96 | +function globToRegex(pattern: string, caseInsensitive: boolean): RegExp | null { |
| 97 | + try { |
| 98 | + const body = pattern |
| 99 | + .split('') |
| 100 | + .map(ch => { |
| 101 | + if (ch === '*') return '.*'; |
| 102 | + if (ch === '?') return '.'; |
| 103 | + return escapeRegex(ch); |
| 104 | + }) |
| 105 | + .join(''); |
| 106 | + const flags = caseInsensitive ? 'i' : ''; |
| 107 | + return new RegExp(`^${body}$`, flags); |
| 108 | + } catch { |
| 109 | + return null; |
| 110 | + } |
| 111 | +} |
| 112 | + |
| 113 | +function escapeRegex(s: string): string { |
| 114 | + return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&'); |
| 115 | +} |
| 116 | + |
| 117 | +function filterAndRankFileRows<T extends Record<string, any>>( |
| 118 | + rows: T[], |
| 119 | + pattern: string, |
| 120 | + mode: SymbolSearchMode, |
| 121 | + caseInsensitive: boolean, |
| 122 | + limit: number |
| 123 | +): T[] { |
| 124 | + const getFile = (r: any) => String(r?.file ?? ''); |
| 125 | + const finalLimit = Math.max(1, limit); |
| 126 | + |
| 127 | + if (mode === 'substring' || mode === 'prefix') { |
| 128 | + const p = caseInsensitive ? pattern.toLowerCase() : pattern; |
| 129 | + const filtered = rows.filter(r => { |
| 130 | + const f = getFile(r); |
| 131 | + const fs = caseInsensitive ? f.toLowerCase() : f; |
| 132 | + return mode === 'prefix' ? fs.startsWith(p) : fs.includes(p); |
| 133 | + }); |
| 134 | + return filtered.slice(0, finalLimit); |
| 135 | + } |
| 136 | + |
| 137 | + if (mode === 'wildcard') { |
| 138 | + const re = globToRegex(pattern, caseInsensitive); |
| 139 | + if (!re) return []; |
| 140 | + const filtered = rows.filter(r => re!.test(getFile(r))); |
| 141 | + return filtered.slice(0, finalLimit); |
| 142 | + } |
| 143 | + |
| 144 | + if (mode === 'regex') { |
| 145 | + const re = buildRegex(pattern, caseInsensitive); |
| 146 | + if (!re) return []; |
| 147 | + const filtered = rows.filter(r => re!.test(getFile(r))); |
| 148 | + return filtered.slice(0, finalLimit); |
| 149 | + } |
| 150 | + |
| 151 | + // Fuzzy matching for files |
| 152 | + const scored = rows |
| 153 | + .map(r => { |
| 154 | + const f = getFile(r); |
| 155 | + const score = fuzzyFileScore(pattern, f, caseInsensitive); |
| 156 | + return { r, score }; |
| 157 | + }) |
| 158 | + .filter(x => x.score >= 0) |
| 159 | + .sort((a, b) => b.score - a.score) |
| 160 | + .slice(0, finalLimit); |
| 161 | + |
| 162 | + return scored.map(x => x.r); |
| 163 | +} |
| 164 | + |
| 165 | +function fuzzyFileScore(needle: string, haystack: string, caseInsensitive: boolean): number { |
| 166 | + if (!needle) return 0; |
| 167 | + const n = caseInsensitive ? needle.toLowerCase() : needle; |
| 168 | + const h = caseInsensitive ? haystack.toLowerCase() : haystack; |
| 169 | + |
| 170 | + let i = 0; |
| 171 | + let score = 0; |
| 172 | + let lastMatch = -2; |
| 173 | + |
| 174 | + for (let j = 0; j < h.length && i < n.length; j++) { |
| 175 | + if (h[j] === n[i]) { |
| 176 | + score += j === lastMatch + 1 ? 2 : 1; |
| 177 | + lastMatch = j; |
| 178 | + i++; |
| 179 | + } |
| 180 | + } |
| 181 | + |
| 182 | + if (i < n.length) return -1; |
| 183 | + return score; |
| 184 | +} |
| 185 | + |
| 186 | +export async function handleSearchFiles(input: SearchFilesInput): Promise<CLIResult | CLIError> { |
| 187 | + const log = createLogger({ component: 'cli', cmd: 'query-files' }); |
| 188 | + const startedAt = Date.now(); |
| 189 | + |
| 190 | + const repoRoot = await resolveGitRoot(path.resolve(input.path)); |
| 191 | + const mode = inferSymbolSearchMode(input.pattern, input.mode); |
| 192 | + |
| 193 | + if (inferWorkspaceRoot(repoRoot)) { |
| 194 | + const res = await queryManifestWorkspace({ |
| 195 | + manifestRepoRoot: repoRoot, |
| 196 | + keyword: input.pattern, |
| 197 | + limit: input.maxCandidates, |
| 198 | + }); |
| 199 | + const filteredByLang = filterWorkspaceRowsByLang(res.rows, input.lang); |
| 200 | + const rows = filterAndRankFileRows( |
| 201 | + filteredByLang, |
| 202 | + input.pattern, |
| 203 | + mode, |
| 204 | + input.caseInsensitive, |
| 205 | + input.limit |
| 206 | + ); |
| 207 | + log.info('query_files', { |
| 208 | + ok: true, |
| 209 | + repoRoot, |
| 210 | + workspace: true, |
| 211 | + mode, |
| 212 | + case_insensitive: input.caseInsensitive, |
| 213 | + limit: input.limit, |
| 214 | + max_candidates: input.maxCandidates, |
| 215 | + candidates: res.rows.length, |
| 216 | + rows: rows.length, |
| 217 | + duration_ms: Date.now() - startedAt, |
| 218 | + }); |
| 219 | + const repoMap = input.withRepoMap |
| 220 | + ? { enabled: false, skippedReason: 'workspace_mode_not_supported' } |
| 221 | + : undefined; |
| 222 | + return success({ ...res, rows, ...(repoMap ? { repo_map: repoMap } : {}) }); |
| 223 | + } |
| 224 | + |
| 225 | + const ctxOrError = await resolveRepoContext(input.path); |
| 226 | + |
| 227 | + if (isCLIError(ctxOrError)) { |
| 228 | + return ctxOrError; |
| 229 | + } |
| 230 | + |
| 231 | + const ctx = ctxOrError as RepoContext; |
| 232 | + |
| 233 | + const validationError = validateIndex(ctx); |
| 234 | + if (validationError) { |
| 235 | + return validationError; |
| 236 | + } |
| 237 | + |
| 238 | + const langs = resolveLanguages(ctx.meta, input.lang); |
| 239 | + if (langs.length === 0) { |
| 240 | + return error('lang_not_available', { |
| 241 | + lang: input.lang, |
| 242 | + available: ctx.meta?.languages ?? [], |
| 243 | + }); |
| 244 | + } |
| 245 | + |
| 246 | + try { |
| 247 | + const dbDir = defaultDbDir(ctx.repoRoot); |
| 248 | + const dim = typeof ctx.meta?.dim === 'number' ? ctx.meta.dim : 256; |
| 249 | + const { byLang } = await openTablesByLang({ dbDir, dim, mode: 'open_only', languages: langs as IndexLang[] }); |
| 250 | + |
| 251 | + // Build WHERE clause based on mode |
| 252 | + const where = buildFileWhere(input.pattern, mode, input.caseInsensitive); |
| 253 | + |
| 254 | + const candidates: any[] = []; |
| 255 | + for (const lang of langs) { |
| 256 | + const t = byLang[lang as IndexLang]; |
| 257 | + if (!t) continue; |
| 258 | + |
| 259 | + // Fetch candidates based on mode |
| 260 | + // For regex/fuzzy, we fetch all and filter in memory |
| 261 | + const shouldFetchAll = mode === 'regex' || mode === 'fuzzy'; |
| 262 | + const rows = shouldFetchAll |
| 263 | + ? await t.refs.query().limit(input.maxCandidates).toArray() |
| 264 | + : where |
| 265 | + ? await t.refs.query().where(where).limit(input.maxCandidates).toArray() |
| 266 | + : await t.refs.query().limit(input.maxCandidates).toArray(); |
| 267 | + |
| 268 | + for (const r of rows as any[]) candidates.push({ ...r, lang }); |
| 269 | + } |
| 270 | + |
| 271 | + // Filter and rank by file name |
| 272 | + const rows = filterAndRankFileRows(candidates, input.pattern, mode, input.caseInsensitive, input.limit); |
| 273 | + |
| 274 | + log.info('query_files', { |
| 275 | + ok: true, |
| 276 | + repoRoot: ctx.repoRoot, |
| 277 | + workspace: false, |
| 278 | + lang: input.lang, |
| 279 | + langs, |
| 280 | + mode, |
| 281 | + case_insensitive: input.caseInsensitive, |
| 282 | + limit: input.limit, |
| 283 | + max_candidates: input.maxCandidates, |
| 284 | + candidates: candidates.length, |
| 285 | + rows: rows.length, |
| 286 | + duration_ms: Date.now() - startedAt, |
| 287 | + }); |
| 288 | + |
| 289 | + const repoMap = input.withRepoMap ? await buildRepoMapAttachment(ctx.repoRoot, input) : undefined; |
| 290 | + |
| 291 | + return success({ |
| 292 | + repoRoot: ctx.repoRoot, |
| 293 | + count: rows.length, |
| 294 | + lang: input.lang, |
| 295 | + rows, |
| 296 | + ...(repoMap ? { repo_map: repoMap } : {}), |
| 297 | + }); |
| 298 | + } catch (e) { |
| 299 | + const message = e instanceof Error ? e.message : String(e); |
| 300 | + log.error('query_files', { ok: false, duration_ms: Date.now() - startedAt, err: message }); |
| 301 | + return error('query_files_failed', { message }); |
| 302 | + } |
| 303 | +} |
0 commit comments