diff --git a/src/qmd.ts b/src/qmd.ts index d57b7e8..69dd534 100755 --- a/src/qmd.ts +++ b/src/qmd.ts @@ -1298,7 +1298,7 @@ function collectionList(): void { for (const coll of collections) { const updatedAt = coll.last_modified ? new Date(coll.last_modified) : new Date(); const timeAgo = formatTimeAgo(updatedAt); - + // Get YAML config to check includeByDefault const yamlColl = getCollectionFromYaml(coll.name); const excluded = yamlColl?.includeByDefault === false; @@ -1998,14 +1998,11 @@ function search(query: string, opts: OutputOptions): void { // Validate collection filter (supports multiple -c flags) // Use default collections if none specified const collectionNames = resolveCollectionFilter(opts.collection, true); - const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined; + const effectiveCollections = collectionNames.length > 0 ? collectionNames : undefined; // Use large limit for --all, otherwise fetch more than needed and let outputResults filter const fetchLimit = opts.all ? 100000 : Math.max(50, opts.limit * 2); - const results = filterByCollections( - searchFTS(db, query, fetchLimit, singleCollection), - collectionNames - ); + const results = searchFTS(db, query, fetchLimit, effectiveCollections); // Add context to results const resultsWithContext = results.map(r => ({ @@ -2053,13 +2050,13 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string = // Validate collection filter (supports multiple -c flags) // Use default collections if none specified const collectionNames = resolveCollectionFilter(opts.collection, true); - const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined; + const effectiveCollections = collectionNames.length > 0 ? collectionNames : undefined; checkIndexHealth(store.db); await withLLMSession(async () => { let results = await vectorSearchQuery(store, query, { - collection: singleCollection, + collection: effectiveCollections, limit: opts.all ? 500 : (opts.limit || 10), minScore: opts.minScore || 0.3, hooks: { @@ -2070,14 +2067,6 @@ async function vectorSearch(query: string, opts: OutputOptions, _model: string = }, }); - // Post-filter for multi-collection - if (collectionNames.length > 1) { - results = results.filter(r => { - const prefixes = collectionNames.map(n => `qmd://${n}/`); - return prefixes.some(p => r.file.startsWith(p)); - }); - } - closeDb(); if (results.length === 0) { @@ -2107,7 +2096,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri // Validate collection filter (supports multiple -c flags) // Use default collections if none specified const collectionNames = resolveCollectionFilter(opts.collection, true); - const singleCollection = collectionNames.length === 1 ? collectionNames[0] : undefined; + const effectiveCollections = collectionNames.length > 0 ? collectionNames : undefined; checkIndexHealth(store.db); @@ -2121,7 +2110,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri // Structured search — user provided their own query expansions const typeLabels = structuredQueries.map(s => s.type).join('+'); process.stderr.write(`${c.dim}Structured search: ${structuredQueries.length} queries (${typeLabels})${c.reset}\n`); - + // Log each sub-query for (const s of structuredQueries) { let preview = s.query.replace(/\n/g, ' '); @@ -2131,7 +2120,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri process.stderr.write(`${c.dim}└─ Searching...${c.reset}\n`); results = await structuredSearch(store, structuredQueries, { - collections: singleCollection ? [singleCollection] : undefined, + collection: effectiveCollections, limit: opts.all ? 500 : (opts.limit || 10), minScore: opts.minScore || 0, hooks: { @@ -2154,7 +2143,7 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri } else { // Standard hybrid query with automatic expansion results = await hybridQuery(store, query, { - collection: singleCollection, + collection: effectiveCollections, limit: opts.all ? 500 : (opts.limit || 10), minScore: opts.minScore || 0, hooks: { @@ -2187,14 +2176,6 @@ async function querySearch(query: string, opts: OutputOptions, _embedModel: stri }); } - // Post-filter for multi-collection - if (collectionNames.length > 1) { - results = results.filter(r => { - const prefixes = collectionNames.map(n => `qmd://${n}/`); - return prefixes.some(p => r.file.startsWith(p)); - }); - } - closeDb(); if (results.length === 0) { diff --git a/src/store.ts b/src/store.ts index ff08c2a..fe3bc35 100644 --- a/src/store.ts +++ b/src/store.ts @@ -259,7 +259,7 @@ export function homedir(): string { */ export function isAbsolutePath(path: string): boolean { if (!path) return false; - + // Unix absolute path if (path.startsWith('/')) { // Check if it's a Git Bash style path like /c/ or /c/Users (C-Z only, not A or B) @@ -273,12 +273,12 @@ export function isAbsolutePath(path: string): boolean { // Any other path starting with / is Unix absolute return true; } - + // Windows native path: C:\ or C:/ (any letter A-Z) if (path.length >= 2 && /[a-zA-Z]/.test(path[0]!) && path[1] === ':') { return true; } - + return false; } @@ -300,25 +300,25 @@ export function getRelativePathFromPrefix(path: string, prefix: string): string if (!prefix) { return null; } - + const normalizedPath = normalizePathSeparators(path); const normalizedPrefix = normalizePathSeparators(prefix); - + // Ensure prefix ends with / for proper matching - const prefixWithSlash = !normalizedPrefix.endsWith('/') - ? normalizedPrefix + '/' + const prefixWithSlash = !normalizedPrefix.endsWith('/') + ? normalizedPrefix + '/' : normalizedPrefix; - + // Exact match if (normalizedPath === normalizedPrefix) { return ''; } - + // Check if path starts with prefix if (normalizedPath.startsWith(prefixWithSlash)) { return normalizedPath.slice(prefixWithSlash.length); } - + return null; } @@ -326,18 +326,18 @@ export function resolve(...paths: string[]): string { if (paths.length === 0) { throw new Error("resolve: at least one path segment is required"); } - + // Normalize all paths to use forward slashes const normalizedPaths = paths.map(normalizePathSeparators); - + let result = ''; let windowsDrive = ''; - + // Check if first path is absolute const firstPath = normalizedPaths[0]!; if (isAbsolutePath(firstPath)) { result = firstPath; - + // Extract Windows drive letter if present if (firstPath.length >= 2 && /[a-zA-Z]/.test(firstPath[0]!) && firstPath[1] === ':') { windowsDrive = firstPath.slice(0, 2); @@ -353,7 +353,7 @@ export function resolve(...paths: string[]): string { } else { // Start with PWD or cwd, then append the first relative path const pwd = normalizePathSeparators(process.env.PWD || process.cwd()); - + // Extract Windows drive from PWD if present if (pwd.length >= 2 && /[a-zA-Z]/.test(pwd[0]!) && pwd[1] === ':') { windowsDrive = pwd.slice(0, 2); @@ -362,14 +362,14 @@ export function resolve(...paths: string[]): string { result = pwd + '/' + firstPath; } } - + // Process remaining paths for (let i = 1; i < normalizedPaths.length; i++) { const p = normalizedPaths[i]!; if (isAbsolutePath(p)) { // Absolute path replaces everything result = p; - + // Update Windows drive if present if (p.length >= 2 && /[a-zA-Z]/.test(p[0]!) && p[1] === ':') { windowsDrive = p.slice(0, 2); @@ -391,7 +391,7 @@ export function resolve(...paths: string[]): string { result = result + '/' + p; } } - + // Normalize . and .. components const parts = result.split('/').filter(Boolean); const normalized: string[] = []; @@ -402,15 +402,15 @@ export function resolve(...paths: string[]): string { normalized.push(part); } } - + // Build final path const finalPath = '/' + normalized.join('/'); - + // Prepend Windows drive if present if (windowsDrive) { return windowsDrive + finalPath; } - + return finalPath; } @@ -805,8 +805,8 @@ export type Store = { toVirtualPath: (absolutePath: string) => string | null; // Search - searchFTS: (query: string, limit?: number, collectionName?: string) => SearchResult[]; - searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => Promise; + searchFTS: (query: string, limit?: number, collectionName?: string | string[]) => SearchResult[]; + searchVec: (query: string, model: string, limit?: number, collectionName?: string | string[], session?: ILLMSession, precomputedEmbedding?: number[]) => Promise; // Query expansion & reranking expandQuery: (query: string, model?: string) => Promise; @@ -888,8 +888,8 @@ export function createStore(dbPath?: string): Store { toVirtualPath: (absolutePath: string) => toVirtualPath(db, absolutePath), // Search - searchFTS: (query: string, limit?: number, collectionName?: string) => searchFTS(db, query, limit, collectionName), - searchVec: (query: string, model: string, limit?: number, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding), + searchFTS: (query: string, limit?: number, collectionName?: string | string[]) => searchFTS(db, query, limit, collectionName), + searchVec: (query: string, model: string, limit?: number, collectionName?: string | string[], session?: ILLMSession, precomputedEmbedding?: number[]) => searchVec(db, query, model, limit, collectionName, session, precomputedEmbedding), // Query expansion & reranking expandQuery: (query: string, model?: string) => expandQuery(query, model, db), @@ -1503,7 +1503,7 @@ export function normalizeDocid(docid: string): string { // Strip surrounding quotes (single or double) if ((normalized.startsWith('"') && normalized.endsWith('"')) || - (normalized.startsWith("'") && normalized.endsWith("'"))) { + (normalized.startsWith("'") && normalized.endsWith("'"))) { normalized = normalized.slice(1, -1); } @@ -2082,7 +2082,7 @@ export function validateSemanticQuery(query: string): string | null { return null; } -export function searchFTS(db: Database, query: string, limit: number = 20, collectionName?: string): SearchResult[] { +export function searchFTS(db: Database, query: string, limit: number = 20, collectionName?: string | string[]): SearchResult[] { const ftsQuery = buildFTS5Query(query); if (!ftsQuery) return []; @@ -2102,8 +2102,19 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle const params: (string | number)[] = [ftsQuery]; if (collectionName) { - sql += ` AND d.collection = ?`; - params.push(String(collectionName)); + if (Array.isArray(collectionName)) { + if (collectionName.length > 0) { + const placeholders = collectionName.map(() => '?').join(','); + sql += ` AND d.collection IN (${placeholders})`; + params.push(...collectionName); + } else { + // Empty array means no collections matched (false-safe) + return []; + } + } else { + sql += ` AND d.collection = ?`; + params.push(String(collectionName)); + } } // bm25 lower is better; sort ascending. @@ -2139,7 +2150,7 @@ export function searchFTS(db: Database, query: string, limit: number = 20, colle // Vector Search // ============================================================================= -export async function searchVec(db: Database, query: string, model: string, limit: number = 20, collectionName?: string, session?: ILLMSession, precomputedEmbedding?: number[]): Promise { +export async function searchVec(db: Database, query: string, model: string, limit: number = 20, collectionName?: string | string[], session?: ILLMSession, precomputedEmbedding?: number[]): Promise { const tableExists = db.prepare(`SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'`).get(); if (!tableExists) return []; @@ -2183,8 +2194,18 @@ export async function searchVec(db: Database, query: string, model: string, limi const params: string[] = [...hashSeqs]; if (collectionName) { - docSql += ` AND d.collection = ?`; - params.push(collectionName); + if (Array.isArray(collectionName)) { + if (collectionName.length > 0) { + const collPlaceholders = collectionName.map(() => '?').join(','); + docSql += ` AND d.collection IN (${collPlaceholders})`; + params.push(...collectionName); + } else { + return []; + } + } else { + docSql += ` AND d.collection = ?`; + params.push(collectionName); + } } const docRows = db.prepare(docSql).all(...params) as { @@ -2863,7 +2884,7 @@ export interface SearchHooks { } export interface HybridQueryOptions { - collection?: string; + collection?: string | string[]; limit?: number; // default 10 minScore?: number; // default 0 candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT @@ -3171,7 +3192,7 @@ export interface StructuredSubSearch { } export interface StructuredSearchOptions { - collections?: string[]; // Filter to specific collections (OR match) + collection?: string | string[]; // Filter to specific collections (OR match) limit?: number; // default 10 minScore?: number; // default 0 candidateLimit?: number; // default RERANK_CANDIDATE_LIMIT @@ -3208,7 +3229,11 @@ export async function structuredSearch( const candidateLimit = options?.candidateLimit ?? RERANK_CANDIDATE_LIMIT; const hooks = options?.hooks; - const collections = options?.collections; + const collections = Array.isArray(options?.collection) + ? options?.collection + : options?.collection + ? [options?.collection] + : undefined; if (searches.length === 0) return []; @@ -3248,21 +3273,16 @@ export async function structuredSearch( `SELECT name FROM sqlite_master WHERE type='table' AND name='vectors_vec'` ).get(); - // Helper to run search across collections (or all if undefined) - const collectionList = collections ?? [undefined]; // undefined = all collections - // Step 1: Run FTS for all lex searches (sync, instant) for (const search of searches) { if (search.type === 'lex') { - for (const coll of collectionList) { - const ftsResults = store.searchFTS(search.query, 20, coll); - if (ftsResults.length > 0) { - for (const r of ftsResults) docidMap.set(r.filepath, r.docid); - rankedLists.push(ftsResults.map(r => ({ - file: r.filepath, displayPath: r.displayPath, - title: r.title, body: r.body || "", score: r.score, - }))); - } + const ftsResults = store.searchFTS(search.query, 20, collections); + if (ftsResults.length > 0) { + for (const r of ftsResults) docidMap.set(r.filepath, r.docid); + rankedLists.push(ftsResults.map(r => ({ + file: r.filepath, displayPath: r.displayPath, + title: r.title, body: r.body || "", score: r.score, + }))); } } } @@ -3282,18 +3302,16 @@ export async function structuredSearch( const embedding = embeddings[i]?.embedding; if (!embedding) continue; - for (const coll of collectionList) { - const vecResults = await store.searchVec( - vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, coll, - undefined, embedding - ); - if (vecResults.length > 0) { - for (const r of vecResults) docidMap.set(r.filepath, r.docid); - rankedLists.push(vecResults.map(r => ({ - file: r.filepath, displayPath: r.displayPath, - title: r.title, body: r.body || "", score: r.score, - }))); - } + const vecResults = await store.searchVec( + vecSearches[i]!.query, DEFAULT_EMBED_MODEL, 20, collections, + undefined, embedding + ); + if (vecResults.length > 0) { + for (const r of vecResults) docidMap.set(r.filepath, r.docid); + rankedLists.push(vecResults.map(r => ({ + file: r.filepath, displayPath: r.displayPath, + title: r.title, body: r.body || "", score: r.score, + }))); } } } diff --git a/test/cli.test.ts b/test/cli.test.ts index b723e7d..411d896 100644 --- a/test/cli.test.ts +++ b/test/cli.test.ts @@ -522,6 +522,41 @@ describe("CLI Search with Collection Filter", () => { console.log("stderr:", stderr); } expect(exitCode).toBe(0); + // Should contain meeting from notes + expect(stdout.toLowerCase()).toContain("meeting"); + // Should not contain unrelated files + expect(stdout.toLowerCase()).not.toContain("api docs"); + }); + + test("filters search by multiple collection names", async () => { + // Add a file in the second collection structure that matches the same query + await runQmd(["collection", "add", ".", "--name", "mixed", "--mask", "notes/ideas.md"], { dbPath: localDbPath }); + + // Both 'notes' and 'mixed' exist but 'docs' doesn't have the word 'meeting' + const { stdout, stderr, exitCode } = await runQmd([ + "search", + "-c", + "notes", + "-c", + "docs", // Include one where it won't be found + "meeting", + ], { dbPath: localDbPath }); + + expect(exitCode).toBe(0); + // The query should STILL return the match from notes despite 'docs' having no match + expect(stdout.toLowerCase()).toContain("meeting"); + + const { stdout: stdoutEmptyRef, exitCode: exitCodeEmptyRef } = await runQmd([ + "search", + "-c", + "notes", + "-c", + "nonexistent_collection", + "meeting", + ], { dbPath: localDbPath }); + + // When a nonexistent collection is requested, qmd exits with error code 1. + expect(exitCodeEmptyRef).toBe(1); }); }); @@ -1072,10 +1107,10 @@ describe("mcp http daemon", () => { const pf = pidPath(); if (existsSync(pf)) { const pid = parseInt(readFileSync(pf, "utf-8").trim()); - try { process.kill(pid, "SIGTERM"); } catch {} + try { process.kill(pid, "SIGTERM"); } catch { } unlinkSync(pf); } - } catch {} + } catch { } await rm(daemonTestDir, { recursive: true, force: true }); }); @@ -1128,7 +1163,7 @@ describe("mcp http daemon", () => { // Clean up process.kill(pid, "SIGTERM"); await sleep(500); - try { require("fs").unlinkSync(pidPath()); } catch {} + try { require("fs").unlinkSync(pidPath()); } catch { } }); test("stop kills daemon and removes PID file", async () => { @@ -1195,7 +1230,7 @@ describe("mcp http daemon", () => { // Clean up first daemon process.kill(pid, "SIGTERM"); await sleep(500); - try { require("fs").unlinkSync(pidPath()); } catch {} + try { require("fs").unlinkSync(pidPath()); } catch { } }); test("--daemon cleans stale PID file and starts fresh", async () => { @@ -1219,6 +1254,6 @@ describe("mcp http daemon", () => { expect(ready).toBe(true); process.kill(pid, "SIGTERM"); await sleep(500); - try { require("fs").unlinkSync(pidPath()); } catch {} + try { require("fs").unlinkSync(pidPath()); } catch { } }); });