Skip to content

Commit cb0878d

Browse files
authored
Merge pull request #22 from mars167/feat/query-files-cli
feat(cli): add query-files command for file name search
2 parents 91a995b + d1ce360 commit cb0878d

6 files changed

Lines changed: 623 additions & 0 deletions

File tree

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
import { Command } from 'commander';
2+
import { executeHandler } from '../types.js';
3+
4+
export const queryFilesCommand = new Command('query-files')
5+
.description('Query refs table by file name match (substring/prefix/wildcard/regex/fuzzy)')
6+
.argument('<pattern>', 'File name pattern to search')
7+
.option('-p, --path <path>', 'Path inside the repository', '.')
8+
.option('--limit <n>', 'Limit results', '50')
9+
.option('--mode <mode>', 'Mode: substring|prefix|wildcard|regex|fuzzy (default: auto)')
10+
.option('--case-insensitive', 'Case-insensitive matching', false)
11+
.option('--max-candidates <n>', 'Max candidates to fetch before filtering', '1000')
12+
.option('--lang <lang>', 'Language: auto|all|java|ts|python|go|rust|c|markdown|yaml', 'auto')
13+
.option('--with-repo-map', 'Attach a lightweight repo map (ranked files + top symbols + wiki links)', false)
14+
.option('--repo-map-files <n>', 'Max repo map files', '20')
15+
.option('--repo-map-symbols <n>', 'Max repo map symbols per file', '5')
16+
.option('--wiki <dir>', 'Wiki directory (default: docs/wiki or wiki)', '')
17+
.action(async (pattern, options) => {
18+
await executeHandler('query-files', { pattern, ...options });
19+
});
Lines changed: 303 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,303 @@
1+
import path from 'path';
2+
import fs from 'fs-extra';
3+
import { inferWorkspaceRoot, resolveGitRoot } from '../../core/git';
4+
import { defaultDbDir, openTablesByLang, type IndexLang } from '../../core/lancedb';
5+
import { queryManifestWorkspace } from '../../core/workspace';
6+
import { inferSymbolSearchMode, type SymbolSearchMode } from '../../core/symbolSearch';
7+
import { createLogger } from '../../core/log';
8+
import { resolveLangs } from '../../core/indexCheck';
9+
import { generateRepoMap, type FileRank } from '../../core/repoMap';
10+
import type { CLIResult, CLIError } from '../types';
11+
import { success, error } from '../types';
12+
import { resolveRepoContext, validateIndex, resolveLanguages, type RepoContext } from '../helpers';
13+
import type { SearchFilesInput } from '../schemas/queryFilesSchemas';
14+
15+
function isCLIError(value: unknown): value is CLIError {
16+
return typeof value === 'object' && value !== null && 'ok' in value && (value as any).ok === false;
17+
}
18+
19+
async function buildRepoMapAttachment(
20+
repoRoot: string,
21+
options: { wiki: string; repoMapFiles: number; repoMapSymbols: number }
22+
): Promise<{ enabled: boolean; wikiDir: string; files: FileRank[] } | { enabled: boolean; skippedReason: string }> {
23+
try {
24+
const wikiDir = resolveWikiDir(repoRoot, options.wiki);
25+
const files = await generateRepoMap({
26+
repoRoot,
27+
maxFiles: options.repoMapFiles,
28+
maxSymbolsPerFile: options.repoMapSymbols,
29+
wikiDir,
30+
});
31+
return { enabled: true, wikiDir, files };
32+
} catch (e: any) {
33+
return { enabled: false, skippedReason: String(e?.message ?? e) };
34+
}
35+
}
36+
37+
function resolveWikiDir(repoRoot: string, wikiOpt: string): string {
38+
const w = String(wikiOpt ?? '').trim();
39+
if (w) return path.resolve(repoRoot, w);
40+
const candidates = [path.join(repoRoot, 'docs', 'wiki'), path.join(repoRoot, 'wiki')];
41+
for (const c of candidates) {
42+
if (fs.existsSync(c)) return c;
43+
}
44+
return '';
45+
}
46+
47+
function inferLangFromFile(file: string): IndexLang {
48+
const f = String(file);
49+
if (f.endsWith('.md') || f.endsWith('.mdx')) return 'markdown';
50+
if (f.endsWith('.yml') || f.endsWith('.yaml')) return 'yaml';
51+
if (f.endsWith('.java')) return 'java';
52+
if (f.endsWith('.c') || f.endsWith('.h')) return 'c';
53+
if (f.endsWith('.go')) return 'go';
54+
if (f.endsWith('.py')) return 'python';
55+
if (f.endsWith('.rs')) return 'rust';
56+
return 'ts';
57+
}
58+
59+
function filterWorkspaceRowsByLang(rows: any[], langSel: string): any[] {
60+
const sel = String(langSel ?? 'auto');
61+
if (sel === 'auto' || sel === 'all') return rows;
62+
const target = sel as IndexLang;
63+
return rows.filter(r => inferLangFromFile(String((r as any).file ?? '')) === target);
64+
}
65+
66+
function escapeQuotes(s: string): string {
67+
return s.replace(/'/g, "''");
68+
}
69+
70+
function buildFileWhere(pattern: string, mode: SymbolSearchMode, caseInsensitive: boolean): string | null {
71+
const safe = escapeQuotes(pattern);
72+
if (!safe) return null;
73+
const likeOp = caseInsensitive ? 'ILIKE' : 'LIKE';
74+
75+
if (mode === 'prefix') {
76+
return `file ${likeOp} '${safe}%'`;
77+
}
78+
79+
if (mode === 'substring' || mode === 'wildcard') {
80+
return `file ${likeOp} '%${safe}%'`;
81+
}
82+
83+
// For regex and fuzzy, we'll handle them in memory after fetching
84+
return null;
85+
}
86+
87+
function buildRegex(pattern: string, caseInsensitive: boolean): RegExp | null {
88+
try {
89+
const flags = caseInsensitive ? 'i' : '';
90+
return new RegExp(pattern, flags);
91+
} catch {
92+
return null;
93+
}
94+
}
95+
96+
function globToRegex(pattern: string, caseInsensitive: boolean): RegExp | null {
97+
try {
98+
const body = pattern
99+
.split('')
100+
.map(ch => {
101+
if (ch === '*') return '.*';
102+
if (ch === '?') return '.';
103+
return escapeRegex(ch);
104+
})
105+
.join('');
106+
const flags = caseInsensitive ? 'i' : '';
107+
return new RegExp(`^${body}$`, flags);
108+
} catch {
109+
return null;
110+
}
111+
}
112+
113+
function escapeRegex(s: string): string {
114+
return s.replace(/[.*+?^${}()|[\]\\]/g, '\\$&');
115+
}
116+
117+
function filterAndRankFileRows<T extends Record<string, any>>(
118+
rows: T[],
119+
pattern: string,
120+
mode: SymbolSearchMode,
121+
caseInsensitive: boolean,
122+
limit: number
123+
): T[] {
124+
const getFile = (r: any) => String(r?.file ?? '');
125+
const finalLimit = Math.max(1, limit);
126+
127+
if (mode === 'substring' || mode === 'prefix') {
128+
const p = caseInsensitive ? pattern.toLowerCase() : pattern;
129+
const filtered = rows.filter(r => {
130+
const f = getFile(r);
131+
const fs = caseInsensitive ? f.toLowerCase() : f;
132+
return mode === 'prefix' ? fs.startsWith(p) : fs.includes(p);
133+
});
134+
return filtered.slice(0, finalLimit);
135+
}
136+
137+
if (mode === 'wildcard') {
138+
const re = globToRegex(pattern, caseInsensitive);
139+
if (!re) return [];
140+
const filtered = rows.filter(r => re!.test(getFile(r)));
141+
return filtered.slice(0, finalLimit);
142+
}
143+
144+
if (mode === 'regex') {
145+
const re = buildRegex(pattern, caseInsensitive);
146+
if (!re) return [];
147+
const filtered = rows.filter(r => re!.test(getFile(r)));
148+
return filtered.slice(0, finalLimit);
149+
}
150+
151+
// Fuzzy matching for files
152+
const scored = rows
153+
.map(r => {
154+
const f = getFile(r);
155+
const score = fuzzyFileScore(pattern, f, caseInsensitive);
156+
return { r, score };
157+
})
158+
.filter(x => x.score >= 0)
159+
.sort((a, b) => b.score - a.score)
160+
.slice(0, finalLimit);
161+
162+
return scored.map(x => x.r);
163+
}
164+
165+
function fuzzyFileScore(needle: string, haystack: string, caseInsensitive: boolean): number {
166+
if (!needle) return 0;
167+
const n = caseInsensitive ? needle.toLowerCase() : needle;
168+
const h = caseInsensitive ? haystack.toLowerCase() : haystack;
169+
170+
let i = 0;
171+
let score = 0;
172+
let lastMatch = -2;
173+
174+
for (let j = 0; j < h.length && i < n.length; j++) {
175+
if (h[j] === n[i]) {
176+
score += j === lastMatch + 1 ? 2 : 1;
177+
lastMatch = j;
178+
i++;
179+
}
180+
}
181+
182+
if (i < n.length) return -1;
183+
return score;
184+
}
185+
186+
export async function handleSearchFiles(input: SearchFilesInput): Promise<CLIResult | CLIError> {
187+
const log = createLogger({ component: 'cli', cmd: 'query-files' });
188+
const startedAt = Date.now();
189+
190+
const repoRoot = await resolveGitRoot(path.resolve(input.path));
191+
const mode = inferSymbolSearchMode(input.pattern, input.mode);
192+
193+
if (inferWorkspaceRoot(repoRoot)) {
194+
const res = await queryManifestWorkspace({
195+
manifestRepoRoot: repoRoot,
196+
keyword: input.pattern,
197+
limit: input.maxCandidates,
198+
});
199+
const filteredByLang = filterWorkspaceRowsByLang(res.rows, input.lang);
200+
const rows = filterAndRankFileRows(
201+
filteredByLang,
202+
input.pattern,
203+
mode,
204+
input.caseInsensitive,
205+
input.limit
206+
);
207+
log.info('query_files', {
208+
ok: true,
209+
repoRoot,
210+
workspace: true,
211+
mode,
212+
case_insensitive: input.caseInsensitive,
213+
limit: input.limit,
214+
max_candidates: input.maxCandidates,
215+
candidates: res.rows.length,
216+
rows: rows.length,
217+
duration_ms: Date.now() - startedAt,
218+
});
219+
const repoMap = input.withRepoMap
220+
? { enabled: false, skippedReason: 'workspace_mode_not_supported' }
221+
: undefined;
222+
return success({ ...res, rows, ...(repoMap ? { repo_map: repoMap } : {}) });
223+
}
224+
225+
const ctxOrError = await resolveRepoContext(input.path);
226+
227+
if (isCLIError(ctxOrError)) {
228+
return ctxOrError;
229+
}
230+
231+
const ctx = ctxOrError as RepoContext;
232+
233+
const validationError = validateIndex(ctx);
234+
if (validationError) {
235+
return validationError;
236+
}
237+
238+
const langs = resolveLanguages(ctx.meta, input.lang);
239+
if (langs.length === 0) {
240+
return error('lang_not_available', {
241+
lang: input.lang,
242+
available: ctx.meta?.languages ?? [],
243+
});
244+
}
245+
246+
try {
247+
const dbDir = defaultDbDir(ctx.repoRoot);
248+
const dim = typeof ctx.meta?.dim === 'number' ? ctx.meta.dim : 256;
249+
const { byLang } = await openTablesByLang({ dbDir, dim, mode: 'open_only', languages: langs as IndexLang[] });
250+
251+
// Build WHERE clause based on mode
252+
const where = buildFileWhere(input.pattern, mode, input.caseInsensitive);
253+
254+
const candidates: any[] = [];
255+
for (const lang of langs) {
256+
const t = byLang[lang as IndexLang];
257+
if (!t) continue;
258+
259+
// Fetch candidates based on mode
260+
// For regex/fuzzy, we fetch all and filter in memory
261+
const shouldFetchAll = mode === 'regex' || mode === 'fuzzy';
262+
const rows = shouldFetchAll
263+
? await t.refs.query().limit(input.maxCandidates).toArray()
264+
: where
265+
? await t.refs.query().where(where).limit(input.maxCandidates).toArray()
266+
: await t.refs.query().limit(input.maxCandidates).toArray();
267+
268+
for (const r of rows as any[]) candidates.push({ ...r, lang });
269+
}
270+
271+
// Filter and rank by file name
272+
const rows = filterAndRankFileRows(candidates, input.pattern, mode, input.caseInsensitive, input.limit);
273+
274+
log.info('query_files', {
275+
ok: true,
276+
repoRoot: ctx.repoRoot,
277+
workspace: false,
278+
lang: input.lang,
279+
langs,
280+
mode,
281+
case_insensitive: input.caseInsensitive,
282+
limit: input.limit,
283+
max_candidates: input.maxCandidates,
284+
candidates: candidates.length,
285+
rows: rows.length,
286+
duration_ms: Date.now() - startedAt,
287+
});
288+
289+
const repoMap = input.withRepoMap ? await buildRepoMapAttachment(ctx.repoRoot, input) : undefined;
290+
291+
return success({
292+
repoRoot: ctx.repoRoot,
293+
count: rows.length,
294+
lang: input.lang,
295+
rows,
296+
...(repoMap ? { repo_map: repoMap } : {}),
297+
});
298+
} catch (e) {
299+
const message = e instanceof Error ? e.message : String(e);
300+
log.error('query_files', { ok: false, duration_ms: Date.now() - startedAt, err: message });
301+
return error('query_files_failed', { message });
302+
}
303+
}

src/cli/registry.ts

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,9 +20,11 @@ import {
2020
import { SemanticSearchSchema } from './schemas/semanticSchemas';
2121
import { IndexRepoSchema } from './schemas/indexSchemas';
2222
import { SearchSymbolsSchema } from './schemas/querySchemas';
23+
import { SearchFilesSchema } from './schemas/queryFilesSchemas';
2324
import { handleSemanticSearch } from './handlers/semanticHandlers';
2425
import { handleIndexRepo } from './handlers/indexHandlers';
2526
import { handleSearchSymbols } from './handlers/queryHandlers';
27+
import { handleSearchFiles } from './handlers/queryFilesHandlers';
2628
import { CheckIndexSchema, StatusSchema } from './schemas/statusSchemas';
2729
import { handleCheckIndex, handleStatus } from './handlers/statusHandlers';
2830
import { PackIndexSchema, UnpackIndexSchema } from './schemas/archiveSchemas';
@@ -59,6 +61,10 @@ export const cliHandlers: Record<string, HandlerRegistration<any>> = {
5961
schema: SearchSymbolsSchema,
6062
handler: handleSearchSymbols,
6163
},
64+
'query-files': {
65+
schema: SearchFilesSchema,
66+
handler: handleSearchFiles,
67+
},
6268
'status': {
6369
schema: StatusSchema,
6470
handler: handleStatus,
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
import { z } from 'zod';
2+
3+
const languageEnum = z.enum(['auto', 'all', 'java', 'ts', 'python', 'go', 'rust', 'c', 'markdown', 'yaml']);
4+
const searchModeEnum = z.enum(['substring', 'prefix', 'wildcard', 'regex', 'fuzzy']);
5+
6+
export const SearchFilesSchema = z.object({
7+
pattern: z.string().min(1, 'Pattern is required'),
8+
path: z.string().default('.'),
9+
limit: z.coerce.number().int().positive().default(50),
10+
mode: searchModeEnum.optional(),
11+
caseInsensitive: z.boolean().default(false),
12+
maxCandidates: z.coerce.number().int().positive().default(1000),
13+
lang: languageEnum.default('auto'),
14+
withRepoMap: z.boolean().default(false),
15+
repoMapFiles: z.coerce.number().int().positive().default(20),
16+
repoMapSymbols: z.coerce.number().int().positive().default(5),
17+
wiki: z.string().default(''),
18+
});
19+
20+
export type SearchFilesInput = z.infer<typeof SearchFilesSchema>;

src/commands/ai.ts

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
import { Command } from 'commander';
22
import { indexCommand } from '../cli/commands/indexCommand.js';
33
import { queryCommand } from '../cli/commands/queryCommand.js';
4+
import { queryFilesCommand } from '../cli/commands/queryFilesCommand.js';
45
import { semanticCommand } from '../cli/commands/semanticCommand.js';
56
import { serveCommand, agentCommand } from '../cli/commands/serveCommands.js';
67
import { packCommand, unpackCommand } from '../cli/commands/archiveCommands.js';
@@ -16,6 +17,7 @@ export const aiCommand = new Command('ai')
1617
.addCommand(statusCommand)
1718
.addCommand(repoMapCommand)
1819
.addCommand(queryCommand)
20+
.addCommand(queryFilesCommand)
1921
.addCommand(semanticCommand)
2022
.addCommand(graphCommand)
2123
.addCommand(packCommand)

0 commit comments

Comments
 (0)