diff --git a/CHANGELOG.md b/CHANGELOG.md index bc94d43..7e95c9a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,11 @@ # @butttons/dora +## 1.5.0 + +### Patch Changes + +- Fix TypeScript strict errors. + ## 1.4.6 ### Patch Changes diff --git a/package.json b/package.json index 0085c4b..3754e43 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@butttons/dora", - "version": "1.4.6", + "version": "1.5.0", "module": "src/index.ts", "type": "module", "private": true, @@ -14,6 +14,7 @@ "license": "MIT", "scripts": { "test": "bun test ./test/", + "tsc": "tsc", "build": "bun build src/index.ts --compile --outfile dist/dora", "generate-proto": "protoc --plugin=./node_modules/.bin/protoc-gen-es --es_out=src/converter --es_opt=target=ts --proto_path=src/proto src/proto/scip.proto", "biome:format": "biome format --write ./src", diff --git a/src/commands/adventure.ts b/src/commands/adventure.ts index 6107f12..f208b3c 100644 --- a/src/commands/adventure.ts +++ b/src/commands/adventure.ts @@ -3,45 +3,45 @@ import { getDependencies, getReverseDependencies } from "../db/queries.ts"; import type { PathResult } from "../types.ts"; import { CtxError } from "../utils/errors.ts"; import { - DEFAULTS, - outputJson, - resolveAndValidatePath, - setupCommand, + DEFAULTS, + outputJson, + resolveAndValidatePath, + setupCommand, } from "./shared.ts"; export async function adventure(from: string, to: string): Promise { - const ctx = await setupCommand(); - - const fromPath = resolveAndValidatePath({ ctx, inputPath: from }); - const toPath = resolveAndValidatePath({ ctx, inputPath: to }); - - // If same file, return direct path - if (fromPath === toPath) { - const result: PathResult = { - from: fromPath, - to: toPath, - path: [fromPath], - distance: 0, - }; - outputJson(result); - return; - } - - // Use BFS to find shortest path - const foundPath = findShortestPath(ctx.db, fromPath, toPath); - - if (!foundPath) { - throw new CtxError(`No path found from ${fromPath} to ${toPath}`); - } - - const result: PathResult = { - from: fromPath, - to: toPath, - path: foundPath, - distance: foundPath.length - 1, - }; - - outputJson(result); + const ctx = await setupCommand(); + + const fromPath = resolveAndValidatePath({ ctx, inputPath: from }); + const toPath = resolveAndValidatePath({ ctx, inputPath: to }); + + // If same file, return direct path + if (fromPath === toPath) { + const result: PathResult = { + from: fromPath, + to: toPath, + path: [fromPath], + distance: 0, + }; + outputJson(result); + return; + } + + // Use BFS to find shortest path + const foundPath = findShortestPath(ctx.db, fromPath, toPath); + + if (!foundPath) { + throw new CtxError(`No path found from ${fromPath} to ${toPath}`); + } + + const result: PathResult = { + from: fromPath, + to: toPath, + path: foundPath, + distance: foundPath.length - 1, + }; + + outputJson(result); } /** @@ -54,106 +54,106 @@ export async function adventure(from: string, to: string): Promise { * This is not a blocker for release as path finding is infrequently used. */ function findShortestPath( - db: Database, - from: string, - to: string, + db: Database, + from: string, + to: string ): string[] | null { - // Try increasing depths until we find a path or reach max depth - const maxDepth = DEFAULTS.MAX_PATH_DEPTH; - - for (let depth = 1; depth <= maxDepth; depth++) { - // Get dependencies from 'from' file - const forwardDeps = getDependencies(db, from, depth); - const forwardSet = new Set(forwardDeps.map((d) => d.path)); - - // Check if 'to' is in forward dependencies - if (forwardSet.has(to)) { - // Reconstruct path using BFS - return reconstructPath(db, from, to, depth, true); - } - - // Get reverse dependencies from 'to' file - const reverseDeps = getReverseDependencies({ db, relativePath: to, depth }); - const reverseSet = new Set(reverseDeps.map((d) => d.path)); - - // Check if 'from' is in reverse dependencies - if (reverseSet.has(from)) { - // Path exists in reverse direction - return reconstructPath(db, from, to, depth, true); - } - - // Check for intersection between forward and reverse - for (const forwardFile of forwardSet) { - if (reverseSet.has(forwardFile)) { - // Found a connecting file - const pathToMiddle = reconstructPath( - db, - from, - forwardFile, - depth, - true, - ); - const pathFromMiddle = reconstructPath( - db, - forwardFile, - to, - depth, - true, - ); - - if (pathToMiddle && pathFromMiddle) { - // Combine paths (remove duplicate middle file) - return [...pathToMiddle, ...pathFromMiddle.slice(1)]; - } - } - } - } - - return null; + // Try increasing depths until we find a path or reach max depth + const maxDepth = DEFAULTS.MAX_PATH_DEPTH; + + for (let depth = 1; depth <= maxDepth; depth++) { + // Get dependencies from 'from' file + const forwardDeps = getDependencies(db, from, depth); + const forwardSet = new Set(forwardDeps.map((d) => d.path)); + + // Check if 'to' is in forward dependencies + if (forwardSet.has(to)) { + // Reconstruct path using BFS + return reconstructPath(db, from, to, depth, true); + } + + // Get reverse dependencies from 'to' file + const reverseDeps = getReverseDependencies(db, to, depth); + const reverseSet = new Set(reverseDeps.map((d) => d.path)); + + // Check if 'from' is in reverse dependencies + if (reverseSet.has(from)) { + // Path exists in reverse direction + return reconstructPath(db, from, to, depth, true); + } + + // Check for intersection between forward and reverse + for (const forwardFile of forwardSet) { + if (reverseSet.has(forwardFile)) { + // Found a connecting file + const pathToMiddle = reconstructPath( + db, + from, + forwardFile, + depth, + true + ); + const pathFromMiddle = reconstructPath( + db, + forwardFile, + to, + depth, + true + ); + + if (pathToMiddle && pathFromMiddle) { + // Combine paths (remove duplicate middle file) + return [...pathToMiddle, ...pathFromMiddle.slice(1)]; + } + } + } + } + + return null; } /** * Reconstruct path using BFS */ function reconstructPath( - db: Database, - from: string, - to: string, - maxDepth: number, - forward: boolean, + db: Database, + from: string, + to: string, + maxDepth: number, + forward: boolean ): string[] | null { - // Simple BFS implementation - const queue: Array<{ file: string; path: string[] }> = [ - { file: from, path: [from] }, - ]; - const visited = new Set([from]); - - while (queue.length > 0) { - const current = queue.shift()!; - - if (current.file === to) { - return current.path; - } - - if (current.path.length > maxDepth) { - continue; - } - - // Get neighbors - const neighbors = forward - ? getDependencies(db, current.file, 1) - : getReverseDependencies({ db, relativePath: current.file, depth: 1 }); - - for (const neighbor of neighbors) { - if (!visited.has(neighbor.path)) { - visited.add(neighbor.path); - queue.push({ - file: neighbor.path, - path: [...current.path, neighbor.path], - }); - } - } - } - - return null; + // Simple BFS implementation + const queue: Array<{ file: string; path: string[] }> = [ + { file: from, path: [from] }, + ]; + const visited = new Set([from]); + + while (queue.length > 0) { + const current = queue.shift()!; + + if (current.file === to) { + return current.path; + } + + if (current.path.length > maxDepth) { + continue; + } + + // Get neighbors + const neighbors = forward + ? getDependencies(db, current.file, 1) + : getReverseDependencies(db, current.file, 1); + + for (const neighbor of neighbors) { + if (!visited.has(neighbor.path)) { + visited.add(neighbor.path); + queue.push({ + file: neighbor.path, + path: [...current.path, neighbor.path], + }); + } + } + } + + return null; } diff --git a/src/commands/changes.ts b/src/commands/changes.ts index 70809f6..6a77886 100644 --- a/src/commands/changes.ts +++ b/src/commands/changes.ts @@ -5,36 +5,34 @@ import { getChangedFiles, isGitRepo } from "../utils/git.ts"; import { DEFAULTS, outputJson, setupCommand } from "./shared.ts"; export async function changes( - ref: string, - _flags: Record = {}, + ref: string, + _flags: Record = {} ): Promise { - if (!(await isGitRepo())) { - throw new CtxError("Not a git repository"); - } + if (!(await isGitRepo())) { + throw new CtxError("Not a git repository"); + } - const ctx = await setupCommand(); - const changedFiles = await getChangedFiles(ref); + const ctx = await setupCommand(); + const changedFiles = await getChangedFiles(ref); - // For each changed file, get its reverse dependencies (depth 1) - const impacted = new Set(); + // For each changed file, get its reverse dependencies (depth 1) + const impacted = new Set(); - for (const file of changedFiles) { - try { - const rdeps = getReverseDependencies({ - db: ctx.db, - relativePath: file, - depth: DEFAULTS.DEPTH, - }); - rdeps.forEach((dep) => impacted.add(dep.path)); - } catch {} - } + for (const file of changedFiles) { + try { + const rdeps = getReverseDependencies(ctx.db, file, DEFAULTS.DEPTH); + rdeps.forEach((dep) => { + impacted.add(dep.path); + }); + } catch {} + } - const result: ChangesResult = { - ref, - changed: changedFiles, - impacted: Array.from(impacted), - total_impacted: impacted.size, - }; + const result: ChangesResult = { + ref, + changed: changedFiles, + impacted: Array.from(impacted), + total_impacted: impacted.size, + }; - outputJson(result); + outputJson(result); } diff --git a/src/commands/exports.ts b/src/commands/exports.ts index c011341..de89bf9 100644 --- a/src/commands/exports.ts +++ b/src/commands/exports.ts @@ -1,43 +1,43 @@ import { - fileExists, - getFileExports, - getPackageExports, + fileExists, + getFileExports, + getPackageExports, } from "../db/queries.ts"; import type { ExportsResult } from "../types.ts"; import { CtxError } from "../utils/errors.ts"; import { outputJson, resolvePath, setupCommand } from "./shared.ts"; export async function exports( - target: string, - _flags: Record = {}, + target: string, + _flags: Record = {} ): Promise { - const ctx = await setupCommand(); + const ctx = await setupCommand(); - // Try as file path first - const relativePath = resolvePath({ ctx: { ctx, inputPath: target } }); + // Try as file path first + const relativePath = resolvePath({ ctx, inputPath: target }); - if (fileExists({ db: ctx.db, relativePath })) { - const exportedSymbols = getFileExports(ctx.db, relativePath); - if (exportedSymbols.length > 0) { - const result: ExportsResult = { - target: relativePath, - exports: exportedSymbols, - }; - outputJson(result); - return; - } - } + if (fileExists(ctx.db, relativePath)) { + const exportedSymbols = getFileExports(ctx.db, relativePath); + if (exportedSymbols.length > 0) { + const result: ExportsResult = { + target: relativePath, + exports: exportedSymbols, + }; + outputJson(result); + return; + } + } - // Try as package name - const packageExports = getPackageExports(ctx.db, target); - if (packageExports.length > 0) { - const result: ExportsResult = { - target, - exports: packageExports, - }; - outputJson(result); - return; - } + // Try as package name + const packageExports = getPackageExports(ctx.db, target); + if (packageExports.length > 0) { + const result: ExportsResult = { + target, + exports: packageExports, + }; + outputJson(result); + return; + } - throw new CtxError(`No exports found for '${target}'`); + throw new CtxError(`No exports found for '${target}'`); } diff --git a/src/commands/graph.ts b/src/commands/graph.ts index 9e56535..6b23273 100644 --- a/src/commands/graph.ts +++ b/src/commands/graph.ts @@ -2,71 +2,71 @@ import { getDependencies, getReverseDependencies } from "../db/queries.ts"; import type { GraphEdge, GraphResult } from "../types.ts"; import { CtxError } from "../utils/errors.ts"; import { - DEFAULTS, - outputJson, - parseIntFlag, - parseStringFlag, - resolveAndValidatePath, - setupCommand, + DEFAULTS, + outputJson, + parseIntFlag, + parseStringFlag, + resolveAndValidatePath, + setupCommand, } from "./shared.ts"; const VALID_DIRECTIONS = ["deps", "rdeps", "both"] as const; export async function graph( - path: string, - flags: Record = {}, + path: string, + flags: Record = {} ): Promise { - const ctx = await setupCommand(); - const depth = parseIntFlag({ - flags, - key: "depth", - defaultValue: DEFAULTS.DEPTH, - }); - const direction = parseStringFlag({ - flags, - key: "direction", - defaultValue: "both", - }); + const ctx = await setupCommand(); + const depth = parseIntFlag({ + flags, + key: "depth", + defaultValue: DEFAULTS.DEPTH, + }); + const direction = parseStringFlag({ + flags, + key: "direction", + defaultValue: "both", + }); - if ( - !VALID_DIRECTIONS.includes(direction as (typeof VALID_DIRECTIONS)[number]) - ) { - throw new CtxError( - `Invalid direction: ${direction}. Must be one of: deps, rdeps, both`, - ); - } + if ( + !VALID_DIRECTIONS.includes(direction as (typeof VALID_DIRECTIONS)[number]) + ) { + throw new CtxError( + `Invalid direction: ${direction}. Must be one of: deps, rdeps, both` + ); + } - const relativePath = resolveAndValidatePath({ ctx, inputPath: path }); + const relativePath = resolveAndValidatePath({ ctx, inputPath: path }); - // Build graph - const nodes = new Set(); - const edges: GraphEdge[] = []; + // Build graph + const nodes = new Set(); + const edges: GraphEdge[] = []; - nodes.add(relativePath); + nodes.add(relativePath); - if (direction === "deps" || direction === "both") { - const deps = getDependencies(ctx.db, relativePath, depth); - deps.forEach((dep) => { - nodes.add(dep.path); - edges.push({ from: relativePath, to: dep.path }); - }); - } + if (direction === "deps" || direction === "both") { + const deps = getDependencies(ctx.db, relativePath, depth); + deps.forEach((dep) => { + nodes.add(dep.path); + edges.push({ from: relativePath, to: dep.path }); + }); + } - if (direction === "rdeps" || direction === "both") { - const rdeps = getReverseDependencies({ db: ctx.db, relativePath, depth }); - rdeps.forEach((rdep) => { - nodes.add(rdep.path); - edges.push({ from: rdep.path, to: relativePath }); - }); - } + if (direction === "rdeps" || direction === "both") { + const rdeps = getReverseDependencies(ctx.db, relativePath, depth); + rdeps.forEach((rdep) => { + nodes.add(rdep.path); + edges.push({ from: rdep.path, to: relativePath }); + }); + } - const result: GraphResult = { - root: relativePath, - direction, - depth, - nodes: Array.from(nodes), - edges, - }; + const result: GraphResult = { + root: relativePath, + direction, + depth, + nodes: Array.from(nodes), + edges, + }; - outputJson(result); + outputJson(result); } diff --git a/src/commands/ls.ts b/src/commands/ls.ts index 5e45b83..044dea8 100644 --- a/src/commands/ls.ts +++ b/src/commands/ls.ts @@ -94,7 +94,10 @@ export async function ls( ); } - const result = getDirectoryFiles(ctx.db, directory, { limit, sort }); + const result = getDirectoryFiles(ctx.db, directory, { + limit, + sort: sort as "path" | "symbols" | "deps" | "rdeps", + }); outputJson(result); } diff --git a/src/commands/query.ts b/src/commands/query.ts index 4944438..002b68e 100644 --- a/src/commands/query.ts +++ b/src/commands/query.ts @@ -19,7 +19,7 @@ export async function query(sql: string) { const rows = stmt.all() as Array>; // Extract column names from first row - const columns = rows.length > 0 ? Object.keys(rows[0]) : []; + const columns = rows.length > 0 ? Object.keys(rows[0]!) : []; const result: QueryResult = { query: sql, diff --git a/src/commands/symbol.ts b/src/commands/symbol.ts index 94d3d53..95a4e2d 100644 --- a/src/commands/symbol.ts +++ b/src/commands/symbol.ts @@ -33,7 +33,7 @@ export async function symbol( const symbolRow = ctx.db .query(symbolIdQuery) - .get(result.name, result.path, result.lines?.[0]) as { + .get(result.name, result.path, result.lines?.[0] ?? 0) as { id: number; } | null; diff --git a/src/converter/convert.ts b/src/converter/convert.ts index f9fbd0d..d59bd0d 100644 --- a/src/converter/convert.ts +++ b/src/converter/convert.ts @@ -4,19 +4,19 @@ import path from "path"; import { debugConverter } from "../utils/logger.ts"; import { processDocuments } from "./documents"; import { - extractKindFromDocumentation, - extractNameFromScip, - extractPackageFromScip, - symbolKindToString, + extractKindFromDocumentation, + extractNameFromScip, + extractPackageFromScip, + symbolKindToString, } from "./helpers"; import { - extractDefinitions, - extractReferences, - getFileDependencies, - type ParsedDocument, - type ParsedSymbol, - parseScipFile, - type ScipData, + extractDefinitions, + extractReferences, + getFileDependencies, + type ParsedDocument, + type ParsedSymbol, + parseScipFile, + type ScipData, } from "./scip-parser"; // Batch size for processing documents to avoid memory exhaustion @@ -170,35 +170,35 @@ CREATE INDEX IF NOT EXISTS idx_document_document_refs_referenced ON document_doc CREATE INDEX IF NOT EXISTS idx_document_document_refs_line ON document_document_refs(line);`; export interface ConversionOptions { - force?: boolean; - ignore?: string[]; + force?: boolean; + ignore?: string[]; } export interface ConversionStats { - mode: "full" | "incremental"; - total_files: number; - total_symbols: number; - changed_files: number; - deleted_files: number; - time_ms: number; - total_documents?: number; - processed_documents?: number; + mode: "full" | "incremental"; + total_files: number; + total_symbols: number; + changed_files: number; + deleted_files: number; + time_ms: number; + total_documents?: number; + processed_documents?: number; } interface ChangedFile { - path: string; - mtime: number; + path: string; + mtime: number; } /** * Helper function to chunk an array into smaller batches */ function chunkArray(array: T[], chunkSize: number): T[][] { - const chunks: T[][] = []; - for (let i = 0; i < array.length; i += chunkSize) { - chunks.push(array.slice(i, i + chunkSize)); - } - return chunks; + const chunks: T[][] = []; + for (let i = 0; i < array.length; i += chunkSize) { + chunks.push(array.slice(i, i + chunkSize)); + } + return chunks; } /** @@ -215,548 +215,547 @@ function chunkArray(array: T[], chunkSize: number): T[][] { * @throws {Error} If SCIP file cannot be parsed or database cannot be created */ export async function convertToDatabase({ - scipPath, - databasePath, - repoRoot, - options = {}, + scipPath, + databasePath, + repoRoot, + options = {}, }: { - scipPath: string; - databasePath: string; - repoRoot: string; - options?: ConversionOptions; + scipPath: string; + databasePath: string; + repoRoot: string; + options?: ConversionOptions; }): Promise { - const startTime = Date.now(); - - // Parse SCIP protobuf file - debugConverter(`Parsing SCIP file at ${scipPath}...`); - let scipData: ScipData; - try { - scipData = await parseScipFile(scipPath); - debugConverter(`Parsed SCIP file: ${scipData.documents.length} documents`); - } catch (error) { - throw new Error(`Failed to parse SCIP file at ${scipPath}: ${error}`); - } - - // Open database - debugConverter(`Opening database at ${databasePath}...`); - let db: Database; - try { - db = new Database(databasePath, { create: true }); - debugConverter("Database opened successfully"); - } catch (error) { - throw new Error( - `Failed to open/create database at ${databasePath}: ${error}`, - ); - } - - // Initialize schema - debugConverter("Initializing database schema..."); - initializeSchema(db); - debugConverter("Schema initialized"); - - // Optimize database for bulk writes - optimizeDatabaseForWrites(db); - - // Determine if this is a full or incremental build - const isFirstRun = !hasExistingData(db); - const isForceFull = options.force === true; - const mode = isFirstRun || isForceFull ? "full" : "incremental"; - debugConverter( - `Build mode: ${mode} (firstRun=${isFirstRun}, force=${isForceFull})`, - ); - - // Create ignore matcher if patterns are provided - const ig = ignore(); - if (options.ignore && options.ignore.length > 0) { - ig.add(options.ignore); - debugConverter(`Filtering with ${options.ignore.length} ignore patterns`); - } - - // Build a quick document lookup (lightweight - just paths) - const documentsByPath = new Map( - scipData.documents.map((doc) => [doc.relativePath, doc]), - ); - - let changedFiles: ChangedFile[]; - let deletedFiles: string[]; - - if (mode === "full") { - // Full rebuild: get all files from SCIP data - debugConverter("Getting all files for full rebuild..."); - changedFiles = await getAllFiles({ documentsByPath, repoRoot, ig }); - deletedFiles = []; - debugConverter(`Full rebuild: processing ${changedFiles.length} files`); - - // Clear existing data - debugConverter("Clearing existing database data..."); - clearAllData(db); - debugConverter("Existing data cleared"); - } else { - // Incremental: detect changes via filesystem scan - debugConverter("Detecting changed and deleted files..."); - const changes = await detectChangedFiles({ - documentsByPath, - db, - repoRoot, - ig, - }); - changedFiles = changes.changed; - deletedFiles = changes.deleted; - debugConverter( - `Incremental build: ${changedFiles.length} changed, ${deletedFiles.length} deleted`, - ); - } - - // Delete old data - if (deletedFiles.length > 0) { - debugConverter( - `Deleting ${deletedFiles.length} old files from database...`, - ); - deleteOldData(db, deletedFiles, []); - debugConverter(`Deleted ${deletedFiles.length} files from database`); - } - - if (changedFiles.length > 0) { - // Delete old versions of changed files - debugConverter( - `Removing old versions of ${changedFiles.length} changed files...`, - ); - deleteOldData(db, [], changedFiles); - - // Process files in batches to avoid memory exhaustion - await processBatches({ db, scipData, changedFiles, repoRoot }); - } - - // Restore database settings - restoreDatabaseSettings(db); - - // Update packages (skip if no files changed) - debugConverter("Updating packages table..."); - updatePackages({ - db, - skipIfNoChanges: changedFiles.length === 0 && deletedFiles.length === 0, - }); - debugConverter("Packages table updated"); - - // Update denormalized fields - debugConverter("Updating denormalized fields..."); - updateDenormalizedFields(db); - debugConverter("Denormalized fields updated"); - - // Process documentation files - debugConverter("Processing documentation files..."); - const docStats = await processDocuments({ - db, - repoRoot, - mode, - ignorePatterns: options.ignore || [], - }); - debugConverter( - `Documentation processing complete: ${docStats.processed} processed, ${docStats.skipped} skipped`, - ); - - // Update metadata and get stats - debugConverter("Updating metadata..."); - const stats = updateMetadata({ - db, - mode, - changedFiles: changedFiles.length, - deletedFiles: deletedFiles.length, - }); - debugConverter( - `Metadata updated: ${stats.total_files} total files, ${stats.total_symbols} total symbols`, - ); - - // Close database - debugConverter("Closing database..."); - db.close(); - - const timeMs = Date.now() - startTime; - - return { - ...stats, - time_ms: timeMs, - total_documents: docStats.total, - processed_documents: docStats.processed, - }; + const startTime = Date.now(); + + // Parse SCIP protobuf file + debugConverter(`Parsing SCIP file at ${scipPath}...`); + let scipData: ScipData; + try { + scipData = await parseScipFile(scipPath); + debugConverter(`Parsed SCIP file: ${scipData.documents.length} documents`); + } catch (error) { + throw new Error(`Failed to parse SCIP file at ${scipPath}: ${error}`); + } + + // Open database + debugConverter(`Opening database at ${databasePath}...`); + let db: Database; + try { + db = new Database(databasePath, { create: true }); + debugConverter("Database opened successfully"); + } catch (error) { + throw new Error( + `Failed to open/create database at ${databasePath}: ${error}` + ); + } + + // Initialize schema + debugConverter("Initializing database schema..."); + initializeSchema(db); + debugConverter("Schema initialized"); + + // Optimize database for bulk writes + optimizeDatabaseForWrites(db); + + // Determine if this is a full or incremental build + const isFirstRun = !hasExistingData(db); + const isForceFull = options.force === true; + const mode = isFirstRun || isForceFull ? "full" : "incremental"; + debugConverter( + `Build mode: ${mode} (firstRun=${isFirstRun}, force=${isForceFull})` + ); + + // Create ignore matcher if patterns are provided + const ig = ignore(); + if (options.ignore && options.ignore.length > 0) { + ig.add(options.ignore); + debugConverter(`Filtering with ${options.ignore.length} ignore patterns`); + } + + // Build a quick document lookup (lightweight - just paths) + const documentsByPath = new Map( + scipData.documents.map((doc) => [doc.relativePath, doc]) + ); + + let changedFiles: ChangedFile[]; + let deletedFiles: string[]; + + if (mode === "full") { + // Full rebuild: get all files from SCIP data + debugConverter("Getting all files for full rebuild..."); + changedFiles = await getAllFiles({ documentsByPath, repoRoot, ig }); + deletedFiles = []; + debugConverter(`Full rebuild: processing ${changedFiles.length} files`); + + // Clear existing data + debugConverter("Clearing existing database data..."); + clearAllData(db); + debugConverter("Existing data cleared"); + } else { + // Incremental: detect changes via filesystem scan + debugConverter("Detecting changed and deleted files..."); + const changes = await detectChangedFiles({ + documentsByPath, + db, + repoRoot, + ig, + }); + changedFiles = changes.changed; + deletedFiles = changes.deleted; + debugConverter( + `Incremental build: ${changedFiles.length} changed, ${deletedFiles.length} deleted` + ); + } + + // Delete old data + if (deletedFiles.length > 0) { + debugConverter( + `Deleting ${deletedFiles.length} old files from database...` + ); + deleteOldData(db, deletedFiles, []); + debugConverter(`Deleted ${deletedFiles.length} files from database`); + } + + if (changedFiles.length > 0) { + // Delete old versions of changed files + debugConverter( + `Removing old versions of ${changedFiles.length} changed files...` + ); + deleteOldData(db, [], changedFiles); + + // Process files in batches to avoid memory exhaustion + await processBatches({ db, scipData, changedFiles, repoRoot }); + } + + // Restore database settings + restoreDatabaseSettings(db); + + // Update packages (skip if no files changed) + debugConverter("Updating packages table..."); + updatePackages({ + db, + skipIfNoChanges: changedFiles.length === 0 && deletedFiles.length === 0, + }); + debugConverter("Packages table updated"); + + // Update denormalized fields + debugConverter("Updating denormalized fields..."); + updateDenormalizedFields(db); + debugConverter("Denormalized fields updated"); + + // Process documentation files + debugConverter("Processing documentation files..."); + const docStats = await processDocuments({ + db, + repoRoot, + mode, + ignorePatterns: options.ignore || [], + }); + debugConverter( + `Documentation processing complete: ${docStats.processed} processed, ${docStats.skipped} skipped` + ); + + // Update metadata and get stats + debugConverter("Updating metadata..."); + const stats = updateMetadata({ + db, + mode, + changedFiles: changedFiles.length, + deletedFiles: deletedFiles.length, + }); + debugConverter( + `Metadata updated: ${stats.total_files} total files, ${stats.total_symbols} total symbols` + ); + + // Close database + debugConverter("Closing database..."); + db.close(); + + const timeMs = Date.now() - startTime; + + return { + ...stats, + time_ms: timeMs, + total_documents: docStats.total, + processed_documents: docStats.processed, + }; } /** * Process files in batches to avoid memory exhaustion */ async function processBatches({ - db, - scipData, - changedFiles, - repoRoot, + db, + scipData, + changedFiles, + repoRoot, }: { - db: Database; - scipData: ScipData; - changedFiles: ChangedFile[]; - repoRoot: string; + db: Database; + scipData: ScipData; + changedFiles: ChangedFile[]; + repoRoot: string; }): Promise { - const timestamp = Math.floor(Date.now() / 1000); - - const insertedFiles = new Set(); - - // Create a set of changed paths for quick lookup - const changedPathsSet = new Set(changedFiles.map((f) => f.path)); - - // Filter scipData documents to only include changed files - const docsToProcess = scipData.documents.filter((doc) => - changedPathsSet.has(doc.relativePath), - ); - - debugConverter( - `Processing ${docsToProcess.length} documents in batches of ${BATCH_SIZE}...`, - ); - - // Build LIGHTWEIGHT global definition map (only symbol -> file path) - debugConverter("Building lightweight global definition map..."); - const globalDefinitionsBySymbol = new Map< - string, - { file: string; definition: any } - >(); - const externalSymbols = scipData.externalSymbols; - - // Process documents in chunks to build definition map without keeping all in memory - for (const doc of scipData.documents) { - // Extract only the symbol IDs and file path (very lightweight) - for (const occ of doc.occurrences) { - if (occ.symbolRoles & 0x1) { - // Definition bit - // Store minimal info - we'll get full details from documentsByPath later - if (!globalDefinitionsBySymbol.has(occ.symbol)) { - globalDefinitionsBySymbol.set(occ.symbol, { - file: doc.relativePath, - definition: { symbol: occ.symbol, range: occ.range }, - }); - } - } - } - } - debugConverter( - `Global definition map built: ${globalDefinitionsBySymbol.size} definitions`, - ); - - // Build LIGHTWEIGHT global symbols map (only external symbols + doc symbols, no duplication) - debugConverter("Building global symbols map..."); - const globalSymbolsById = new Map(); - - // Add external symbols first (these are small, usually < 10K) - for (const sym of externalSymbols) { - globalSymbolsById.set(sym.symbol, sym); - } - - // Add document symbols efficiently (no deep copies) - for (const doc of scipData.documents) { - for (const sym of doc.symbols) { - if (!globalSymbolsById.has(sym.symbol)) { - globalSymbolsById.set(sym.symbol, sym); - } - } - } - debugConverter(`Global symbols map built: ${globalSymbolsById.size} symbols`); - - // Chunk documents into batches - const batches = chunkArray(docsToProcess, BATCH_SIZE); - - // Clear scipData external symbols reference (we copied it) - scipData.externalSymbols = []; - debugConverter("Cleared scipData external symbols"); - - const totalBatches = batches.length; - let processedFiles = 0; - const totalFiles = docsToProcess.length; - const progressStartTime = Date.now(); - - for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) { - const batch = batches[batchIndex]; - const batchNum = batchIndex + 1; - - // Calculate progress - const percent = Math.floor((processedFiles / totalFiles) * 100); - const elapsed = (Date.now() - progressStartTime) / 1000; - const rate = processedFiles / elapsed || 0; - const remaining = totalFiles - processedFiles; - const eta = rate > 0 ? Math.ceil(remaining / rate) : 0; - - debugConverter( - `\rIndexing: ${percent}% (${processedFiles}/${totalFiles} files, batch ${batchNum}/${totalBatches}, ETA: ${eta}s) `, - ); - - // Build lightweight document map for this batch - const documentsByPath = new Map( - batch.map((doc) => [doc.relativePath, doc]), - ); - - // Get ChangedFile objects for this batch - const batchChangedFiles = changedFiles.filter((f) => - batch.some((doc) => doc.relativePath === f.path), - ); - - // Convert files in this batch - await convertFiles( - documentsByPath, - globalSymbolsById, - db, - batchChangedFiles, - timestamp, - insertedFiles, - ); - - // Update dependencies for this batch (uses global maps for cross-batch deps) - await updateDependencies( - documentsByPath, - globalSymbolsById, - globalDefinitionsBySymbol, - db, - batchChangedFiles, - ); - - // Update symbol references for this batch - await updateSymbolReferences({ - documentsByPath, - symbolsById: globalSymbolsById, - db, - changedFiles: batchChangedFiles, - }); - - processedFiles += batch.length; - } - - process.stderr.write("\n"); - debugConverter( - `Batch processing complete: ${processedFiles} files processed`, - ); + const timestamp = Math.floor(Date.now() / 1000); + + const insertedFiles = new Set(); + + // Create a set of changed paths for quick lookup + const changedPathsSet = new Set(changedFiles.map((f) => f.path)); + + // Filter scipData documents to only include changed files + const docsToProcess = scipData.documents.filter((doc) => + changedPathsSet.has(doc.relativePath) + ); + + debugConverter( + `Processing ${docsToProcess.length} documents in batches of ${BATCH_SIZE}...` + ); + + // Build LIGHTWEIGHT global definition map (only symbol -> file path) + debugConverter("Building lightweight global definition map..."); + const globalDefinitionsBySymbol = new Map< + string, + { file: string; definition: any } + >(); + const externalSymbols = scipData.externalSymbols; + + // Process documents in chunks to build definition map without keeping all in memory + for (const doc of scipData.documents) { + // Extract only the symbol IDs and file path (very lightweight) + for (const occ of doc.occurrences) { + if (occ.symbolRoles & 0x1) { + // Definition bit + // Store minimal info - we'll get full details from documentsByPath later + if (!globalDefinitionsBySymbol.has(occ.symbol)) { + globalDefinitionsBySymbol.set(occ.symbol, { + file: doc.relativePath, + definition: { symbol: occ.symbol, range: occ.range }, + }); + } + } + } + } + debugConverter( + `Global definition map built: ${globalDefinitionsBySymbol.size} definitions` + ); + + // Build LIGHTWEIGHT global symbols map (only external symbols + doc symbols, no duplication) + debugConverter("Building global symbols map..."); + const globalSymbolsById = new Map(); + + // Add external symbols first (these are small, usually < 10K) + for (const sym of externalSymbols) { + globalSymbolsById.set(sym.symbol, sym); + } + + // Add document symbols efficiently (no deep copies) + for (const doc of scipData.documents) { + for (const sym of doc.symbols) { + if (!globalSymbolsById.has(sym.symbol)) { + globalSymbolsById.set(sym.symbol, sym); + } + } + } + debugConverter(`Global symbols map built: ${globalSymbolsById.size} symbols`); + + // Chunk documents into batches + const batches = chunkArray(docsToProcess, BATCH_SIZE); + + // Clear scipData external symbols reference (we copied it) + scipData.externalSymbols = []; + debugConverter("Cleared scipData external symbols"); + + const totalBatches = batches.length; + let processedFiles = 0; + const totalFiles = docsToProcess.length; + const progressStartTime = Date.now(); + + for (let batchIndex = 0; batchIndex < batches.length; batchIndex++) { + const batch = batches[batchIndex]!; + const batchNum = batchIndex + 1; + + // Calculate progress + const percent = Math.floor((processedFiles / totalFiles) * 100); + const elapsed = (Date.now() - progressStartTime) / 1000; + const rate = processedFiles / elapsed || 0; + const remaining = totalFiles - processedFiles; + const eta = rate > 0 ? Math.ceil(remaining / rate) : 0; + + debugConverter( + `\rIndexing: ${percent}% (${processedFiles}/${totalFiles} files, batch ${batchNum}/${totalBatches}, ETA: ${eta}s) ` + ); + + // Build lightweight document map for this batch + const documentsByPath = new Map( + batch.map((doc) => [doc.relativePath, doc]) + ); + + // Get ChangedFile objects for this batch + const batchChangedFiles = changedFiles.filter((f) => + batch.some((doc) => doc.relativePath === f.path) + ); + + // Convert files in this batch + await convertFiles( + documentsByPath, + globalSymbolsById, + db, + batchChangedFiles, + timestamp, + insertedFiles + ); + + // Update dependencies for this batch (uses global maps for cross-batch deps) + await updateDependencies( + documentsByPath, + globalSymbolsById, + globalDefinitionsBySymbol, + db, + batchChangedFiles + ); + + // Update symbol references for this batch + await updateSymbolReferences({ + documentsByPath, + db, + changedFiles: batchChangedFiles, + }); + + processedFiles += batch.length; + } + + process.stderr.write("\n"); + debugConverter( + `Batch processing complete: ${processedFiles} files processed` + ); } /** * Initialize database schema */ function initializeSchema(db: Database): void { - // Check if all tables exist (including new ones like documents) - // We always run the schema if any table is missing - try { - const filesCheck = db - .query( - "SELECT name FROM sqlite_master WHERE type='table' AND name='files'", - ) - .get(); - - const documentsCheck = db - .query( - "SELECT name FROM sqlite_master WHERE type='table' AND name='documents'", - ) - .get(); - - if (filesCheck && documentsCheck) { - // All tables exist, skip initialization - return; - } - } catch { - // Continue with initialization - } - - // Execute schema (multiple statements) - const statements = SCHEMA_SQL.split(";") - .map((s) => s.trim()) - .filter((s) => s.length > 0); - - for (const stmt of statements) { - db.run(stmt); - } + // Check if all tables exist (including new ones like documents) + // We always run the schema if any table is missing + try { + const filesCheck = db + .query( + "SELECT name FROM sqlite_master WHERE type='table' AND name='files'" + ) + .get(); + + const documentsCheck = db + .query( + "SELECT name FROM sqlite_master WHERE type='table' AND name='documents'" + ) + .get(); + + if (filesCheck && documentsCheck) { + // All tables exist, skip initialization + return; + } + } catch { + // Continue with initialization + } + + // Execute schema (multiple statements) + const statements = SCHEMA_SQL.split(";") + .map((s) => s.trim()) + .filter((s) => s.length > 0); + + for (const stmt of statements) { + db.run(stmt); + } } /** * Optimize database for bulk writes */ function optimizeDatabaseForWrites(db: Database): void { - debugConverter("Optimizing database for bulk writes..."); + debugConverter("Optimizing database for bulk writes..."); - // Disable synchronous writes (much faster, but less crash-safe during indexing) - db.run("PRAGMA synchronous = OFF"); + // Disable synchronous writes (much faster, but less crash-safe during indexing) + db.run("PRAGMA synchronous = OFF"); - // Use memory for journal (faster than disk) - db.run("PRAGMA journal_mode = MEMORY"); + // Use memory for journal (faster than disk) + db.run("PRAGMA journal_mode = MEMORY"); - // Increase cache size (10MB) - db.run("PRAGMA cache_size = -10000"); + // Increase cache size (10MB) + db.run("PRAGMA cache_size = -10000"); - debugConverter("Database optimizations applied"); + debugConverter("Database optimizations applied"); } /** * Restore normal database settings after bulk writes */ function restoreDatabaseSettings(db: Database): void { - debugConverter("Restoring normal database settings..."); + debugConverter("Restoring normal database settings..."); - // Re-enable synchronous writes - db.run("PRAGMA synchronous = FULL"); + // Re-enable synchronous writes + db.run("PRAGMA synchronous = FULL"); - // Switch back to WAL mode - db.run("PRAGMA journal_mode = WAL"); + // Switch back to WAL mode + db.run("PRAGMA journal_mode = WAL"); - debugConverter("Database settings restored"); + debugConverter("Database settings restored"); } /** * Check if database has existing data */ function hasExistingData(db: Database): boolean { - try { - const result = db.query("SELECT COUNT(*) as count FROM files").get() as { - count: number; - }; - return result.count > 0; - } catch { - return false; - } + try { + const result = db.query("SELECT COUNT(*) as count FROM files").get() as { + count: number; + }; + return result.count > 0; + } catch { + return false; + } } /** * Clear all data from database (for full rebuild) */ function clearAllData(db: Database): void { - db.run("BEGIN TRANSACTION"); - db.run("DELETE FROM symbol_references"); - db.run("DELETE FROM dependencies"); - db.run("DELETE FROM symbols"); - db.run("DELETE FROM files"); - db.run("DELETE FROM packages"); - db.run("DELETE FROM metadata"); - db.run("COMMIT"); + db.run("BEGIN TRANSACTION"); + db.run("DELETE FROM symbol_references"); + db.run("DELETE FROM dependencies"); + db.run("DELETE FROM symbols"); + db.run("DELETE FROM files"); + db.run("DELETE FROM packages"); + db.run("DELETE FROM metadata"); + db.run("COMMIT"); } /** * Get all files from SCIP data (for full rebuild) */ async function getAllFiles({ - documentsByPath, - repoRoot, - ig, + documentsByPath, + repoRoot, + ig, }: { - documentsByPath: Map; - repoRoot: string; - ig: ReturnType; + documentsByPath: Map; + repoRoot: string; + ig: ReturnType; }): Promise { - const files: ChangedFile[] = []; - - for (const [relativePath, doc] of documentsByPath) { - if (ig.ignores(relativePath)) { - continue; - } - - const fullPath = path.join(repoRoot, relativePath); - try { - const stat = await Bun.file(fullPath).stat(); - const mtime = Math.floor(stat.mtime.getTime() / 1000); - files.push({ path: relativePath, mtime }); - } catch {} - } - - return files; + const files: ChangedFile[] = []; + + for (const [relativePath, doc] of documentsByPath) { + if (ig.ignores(relativePath)) { + continue; + } + + const fullPath = path.join(repoRoot, relativePath); + try { + const stat = await Bun.file(fullPath).stat(); + const mtime = Math.floor(stat.mtime.getTime() / 1000); + files.push({ path: relativePath, mtime }); + } catch {} + } + + return files; } /** * Detect changed and deleted files (for incremental rebuild) */ async function detectChangedFiles({ - documentsByPath, - db, - repoRoot, - ig, + documentsByPath, + db, + repoRoot, + ig, }: { - documentsByPath: Map; - db: Database; - repoRoot: string; - ig: ReturnType; + documentsByPath: Map; + db: Database; + repoRoot: string; + ig: ReturnType; }): Promise<{ changed: ChangedFile[]; deleted: string[] }> { - // Get existing files from database with mtime - const existingFiles = new Map( - ( - db.query("SELECT path, mtime FROM files").all() as Array<{ - path: string; - mtime: number; - }> - ).map((f) => [f.path, f.mtime]), - ); - - const changed: ChangedFile[] = []; - const deleted = new Set(existingFiles.keys()); - - for (const [relativePath, doc] of documentsByPath) { - if (ig.ignores(relativePath)) { - continue; - } - - deleted.delete(relativePath); - - // Get current mtime from filesystem - const fullPath = path.join(repoRoot, relativePath); - try { - const stat = await Bun.file(fullPath).stat(); - const currentMtime = Math.floor(stat.mtime.getTime() / 1000); - - const existingMtime = existingFiles.get(relativePath); - - // File is new or modified - if (!existingMtime || currentMtime > existingMtime) { - changed.push({ path: relativePath, mtime: currentMtime }); - } - } catch {} - } - - return { changed, deleted: Array.from(deleted) }; + // Get existing files from database with mtime + const existingFiles = new Map( + ( + db.query("SELECT path, mtime FROM files").all() as Array<{ + path: string; + mtime: number; + }> + ).map((f) => [f.path, f.mtime]) + ); + + const changed: ChangedFile[] = []; + const deleted = new Set(existingFiles.keys()); + + for (const [relativePath, doc] of documentsByPath) { + if (ig.ignores(relativePath)) { + continue; + } + + deleted.delete(relativePath); + + // Get current mtime from filesystem + const fullPath = path.join(repoRoot, relativePath); + try { + const stat = await Bun.file(fullPath).stat(); + const currentMtime = Math.floor(stat.mtime.getTime() / 1000); + + const existingMtime = existingFiles.get(relativePath); + + // File is new or modified + if (!existingMtime || currentMtime > existingMtime) { + changed.push({ path: relativePath, mtime: currentMtime }); + } + } catch {} + } + + return { changed, deleted: Array.from(deleted) }; } /** * Delete old data for deleted or changed files */ function deleteOldData( - db: Database, - deletedFiles: string[], - changedFiles: ChangedFile[], + db: Database, + deletedFiles: string[], + changedFiles: ChangedFile[] ): void { - const allFilesToRemove = [ - ...deletedFiles, - ...changedFiles.map((f) => f.path), - ]; + const allFilesToRemove = [ + ...deletedFiles, + ...changedFiles.map((f) => f.path), + ]; - if (allFilesToRemove.length === 0) return; + if (allFilesToRemove.length === 0) return; - db.run("BEGIN TRANSACTION"); + db.run("BEGIN TRANSACTION"); - const stmt = db.prepare("DELETE FROM files WHERE path = ?"); - for (const filePath of allFilesToRemove) { - stmt.run(filePath); - } + const stmt = db.prepare("DELETE FROM files WHERE path = ?"); + for (const filePath of allFilesToRemove) { + stmt.run(filePath); + } - db.run("COMMIT"); + db.run("COMMIT"); } /** * Convert changed files from SCIP data to database */ async function convertFiles( - documentsByPath: Map, - symbolsById: Map, - db: Database, - changedFiles: ChangedFile[], - timestamp: number, - insertedFiles: Set, + documentsByPath: Map, + symbolsById: Map, + db: Database, + changedFiles: ChangedFile[], + timestamp: number, + insertedFiles: Set ): Promise { - if (changedFiles.length === 0) return; + if (changedFiles.length === 0) return; - debugConverter("Starting database transaction for file conversion..."); - db.run("BEGIN TRANSACTION"); + debugConverter("Starting database transaction for file conversion..."); + db.run("BEGIN TRANSACTION"); - const fileStmt = db.prepare( - "INSERT INTO files (path, language, mtime, indexed_at) VALUES (?, ?, ?, ?)", - ); + const fileStmt = db.prepare( + "INSERT INTO files (path, language, mtime, indexed_at) VALUES (?, ?, ?, ?)" + ); - const symbolStmt = db.prepare(` + const symbolStmt = db.prepare(` INSERT INTO symbols ( file_id, name, scip_symbol, kind, start_line, end_line, start_char, end_char, @@ -764,346 +763,342 @@ async function convertFiles( ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?) `); - let processedCount = 0; - const logInterval = Math.max(1, Math.floor(changedFiles.length / 10)); // Log every 10% - - for (const { path: filePath, mtime } of changedFiles) { - processedCount++; - - if ( - processedCount % logInterval === 0 || - processedCount === changedFiles.length - ) { - debugConverter( - `Converting files: ${processedCount}/${ - changedFiles.length - } (${Math.floor((processedCount / changedFiles.length) * 100)}%)`, - ); - } - - if (insertedFiles.has(filePath)) { - continue; - } - - // Get document from parsed SCIP data - const doc = documentsByPath.get(filePath); - if (!doc) continue; - - // Insert file - fileStmt.run(filePath, doc.language, mtime, timestamp); - - insertedFiles.add(filePath); - - // Get file_id from database - const fileRecord = db - .query("SELECT id FROM files WHERE path = ?") - .get(filePath) as { id: number } | undefined; - - if (!fileRecord) continue; - - const fileId = fileRecord.id; - - // Extract symbol definitions from occurrences - const definitions = extractDefinitions(doc); - - // Insert symbols (batch) - for (const def of definitions) { - const symbolInfo = symbolsById.get(def.symbol); - - // Get symbol metadata - let kind = symbolKindToString(symbolInfo?.kind ?? 0); - - // Fallback: If kind is unknown, try to extract from documentation - if (kind === "unknown" && symbolInfo?.documentation) { - kind = extractKindFromDocumentation(symbolInfo.documentation); - } - - const pkg = extractPackageFromScip(def.symbol); - const name = symbolInfo?.displayName || extractNameFromScip(def.symbol); - const documentation = symbolInfo?.documentation?.join("\n"); - - // Detect if symbol is local (function parameters, closure variables, etc.) - const isLocal = def.symbol.includes("local") ? 1 : 0; - - symbolStmt.run( - fileId, - name, - def.symbol, - kind, - def.range[0], // start_line - def.range[2], // end_line - def.range[1], // start_char - def.range[3], // end_char - documentation || null, - pkg, - isLocal, - ); - } - } - - debugConverter(`Committing transaction for ${changedFiles.length} files...`); - db.run("COMMIT"); - debugConverter("Transaction committed successfully"); + let processedCount = 0; + const logInterval = Math.max(1, Math.floor(changedFiles.length / 10)); // Log every 10% + + for (const { path: filePath, mtime } of changedFiles) { + processedCount++; + + if ( + processedCount % logInterval === 0 || + processedCount === changedFiles.length + ) { + debugConverter( + `Converting files: ${processedCount}/${ + changedFiles.length + } (${Math.floor((processedCount / changedFiles.length) * 100)}%)` + ); + } + + if (insertedFiles.has(filePath)) { + continue; + } + + // Get document from parsed SCIP data + const doc = documentsByPath.get(filePath); + if (!doc) continue; + + // Insert file + fileStmt.run(filePath, doc.language, mtime, timestamp); + + insertedFiles.add(filePath); + + // Get file_id from database + const fileRecord = db + .query("SELECT id FROM files WHERE path = ?") + .get(filePath) as { id: number } | undefined; + + if (!fileRecord) continue; + + const fileId = fileRecord.id; + + // Extract symbol definitions from occurrences + const definitions = extractDefinitions(doc); + + // Insert symbols (batch) + for (const def of definitions) { + const symbolInfo = symbolsById.get(def.symbol); + + // Get symbol metadata + let kind = symbolKindToString(symbolInfo?.kind ?? 0); + + // Fallback: If kind is unknown, try to extract from documentation + if (kind === "unknown" && symbolInfo?.documentation) { + kind = extractKindFromDocumentation(symbolInfo.documentation); + } + + const pkg = extractPackageFromScip(def.symbol); + const name = symbolInfo?.displayName || extractNameFromScip(def.symbol); + const documentation = symbolInfo?.documentation?.join("\n"); + + // Detect if symbol is local (function parameters, closure variables, etc.) + const isLocal = def.symbol.includes("local") ? 1 : 0; + + symbolStmt.run( + fileId, + name, + def.symbol, + kind, + def.range[0], // start_line + def.range[2], // end_line + def.range[1], // start_char + def.range[3], // end_char + documentation || null, + pkg, + isLocal + ); + } + } + + debugConverter(`Committing transaction for ${changedFiles.length} files...`); + db.run("COMMIT"); + debugConverter("Transaction committed successfully"); } /** * Update dependencies for changed files */ async function updateDependencies( - documentsByPath: Map, - symbolsById: Map, - definitionsBySymbol: Map, - db: Database, - changedFiles: ChangedFile[], + documentsByPath: Map, + symbolsById: Map, + definitionsBySymbol: Map, + db: Database, + changedFiles: ChangedFile[] ): Promise { - if (changedFiles.length === 0) return; - - const changedPaths = changedFiles.map((f) => f.path); - debugConverter( - `Finding affected files for ${changedPaths.length} changed files...`, - ); - - // Get affected files (changed + their dependents) - const affectedFiles = new Set(changedPaths); - - // Find files that import changed files - if (changedPaths.length > 0) { - const placeholders = changedPaths.map(() => "?").join(","); - const dependents = db - .query( - ` + if (changedFiles.length === 0) return; + + const changedPaths = changedFiles.map((f) => f.path); + debugConverter( + `Finding affected files for ${changedPaths.length} changed files...` + ); + + // Get affected files (changed + their dependents) + const affectedFiles = new Set(changedPaths); + + // Find files that import changed files + if (changedPaths.length > 0) { + const placeholders = changedPaths.map(() => "?").join(","); + const dependents = db + .query( + ` SELECT DISTINCT f.path FROM dependencies d JOIN files f ON f.id = d.from_file_id JOIN files f2 ON f2.id = d.to_file_id WHERE f2.path IN (${placeholders}) - `, - ) - .all(...changedPaths) as Array<{ path: string }>; - - for (const { path } of dependents) { - affectedFiles.add(path); - } - debugConverter( - `Found ${dependents.length} dependent files, total affected: ${affectedFiles.size}`, - ); - } - - // Delete old dependencies for affected files - debugConverter("Starting transaction for dependencies update..."); - db.run("BEGIN TRANSACTION"); - - const deleteStmt = db.prepare(` + ` + ) + .all(...changedPaths) as Array<{ path: string }>; + + for (const { path } of dependents) { + affectedFiles.add(path); + } + debugConverter( + `Found ${dependents.length} dependent files, total affected: ${affectedFiles.size}` + ); + } + + // Delete old dependencies for affected files + debugConverter("Starting transaction for dependencies update..."); + db.run("BEGIN TRANSACTION"); + + const deleteStmt = db.prepare(` DELETE FROM dependencies WHERE from_file_id IN (SELECT id FROM files WHERE path = ?) `); - for (const filePath of affectedFiles) { - deleteStmt.run(filePath); - } - debugConverter(`Deleted old dependencies for ${affectedFiles.size} files`); + for (const filePath of affectedFiles) { + deleteStmt.run(filePath); + } + debugConverter(`Deleted old dependencies for ${affectedFiles.size} files`); - // Recompute dependencies from SCIP data - debugConverter(`Recomputing dependencies for ${affectedFiles.size} files...`); - const insertStmt = db.prepare(` + // Recompute dependencies from SCIP data + debugConverter(`Recomputing dependencies for ${affectedFiles.size} files...`); + const insertStmt = db.prepare(` INSERT INTO dependencies (from_file_id, to_file_id, symbol_count, symbols) VALUES (?, ?, ?, ?) `); - let processedCount = 0; - const logInterval = Math.max(1, Math.floor(affectedFiles.size / 10)); - - for (const fromPath of affectedFiles) { - processedCount++; - - if ( - processedCount % logInterval === 0 || - processedCount === affectedFiles.size - ) { - debugConverter( - `Processing dependencies: ${processedCount}/${ - affectedFiles.size - } (${Math.floor((processedCount / affectedFiles.size) * 100)}%)`, - ); - } - const doc = documentsByPath.get(fromPath); - if (!doc) continue; - - // Get file dependencies - const depsByFile = getFileDependencies({ - doc, - documentsByPath, - symbolsById, - definitionsBySymbol, - }); - - const fromFileRecord = db - .query("SELECT id FROM files WHERE path = ?") - .get(fromPath) as { id: number } | undefined; - - if (!fromFileRecord) continue; - - const fromFileId = fromFileRecord.id; - - for (const [toPath, symbols] of depsByFile) { - const toFileRecord = db - .query("SELECT id FROM files WHERE path = ?") - .get(toPath) as { id: number } | undefined; - - if (!toFileRecord) continue; - - // Extract symbol names - const symbolNames = Array.from( - new Set( - Array.from(symbols) - .filter((scipSymbol) => !scipSymbol.includes("local")) // Filter out local symbols - .map((scipSymbol) => { - const symbolInfo = symbolsById.get(scipSymbol); - return symbolInfo?.displayName || extractNameFromScip(scipSymbol); - }) - .filter((name) => name && name !== "unknown"), - ), - ); - - if (symbolNames.length === 0) continue; - - insertStmt.run( - fromFileId, - toFileRecord.id, - symbolNames.length, - JSON.stringify(symbolNames), - ); - } - } - - debugConverter("Committing dependencies transaction..."); - db.run("COMMIT"); - debugConverter("Dependencies transaction committed"); + let processedCount = 0; + const logInterval = Math.max(1, Math.floor(affectedFiles.size / 10)); + + for (const fromPath of affectedFiles) { + processedCount++; + + if ( + processedCount % logInterval === 0 || + processedCount === affectedFiles.size + ) { + debugConverter( + `Processing dependencies: ${processedCount}/${ + affectedFiles.size + } (${Math.floor((processedCount / affectedFiles.size) * 100)}%)` + ); + } + const doc = documentsByPath.get(fromPath); + if (!doc) continue; + + // Get file dependencies + const depsByFile = getFileDependencies({ + doc, + definitionsBySymbol, + }); + + const fromFileRecord = db + .query("SELECT id FROM files WHERE path = ?") + .get(fromPath) as { id: number } | undefined; + + if (!fromFileRecord) continue; + + const fromFileId = fromFileRecord.id; + + for (const [toPath, symbols] of depsByFile) { + const toFileRecord = db + .query("SELECT id FROM files WHERE path = ?") + .get(toPath) as { id: number } | undefined; + + if (!toFileRecord) continue; + + // Extract symbol names + const symbolNames = Array.from( + new Set( + Array.from(symbols) + .filter((scipSymbol) => !scipSymbol.includes("local")) // Filter out local symbols + .map((scipSymbol) => { + const symbolInfo = symbolsById.get(scipSymbol); + return symbolInfo?.displayName || extractNameFromScip(scipSymbol); + }) + .filter((name) => name && name !== "unknown") + ) + ); + + if (symbolNames.length === 0) continue; + + insertStmt.run( + fromFileId, + toFileRecord.id, + symbolNames.length, + JSON.stringify(symbolNames) + ); + } + } + + debugConverter("Committing dependencies transaction..."); + db.run("COMMIT"); + debugConverter("Dependencies transaction committed"); } /** * Update symbol references for changed files */ async function updateSymbolReferences({ - documentsByPath, - symbolsById, - db, - changedFiles, + documentsByPath, + db, + changedFiles, }: { - documentsByPath: Map; - symbolsById: Map; - db: Database; - changedFiles: ChangedFile[]; + documentsByPath: Map; + db: Database; + changedFiles: ChangedFile[]; }): Promise { - if (changedFiles.length === 0) return; + if (changedFiles.length === 0) return; - const affectedFiles = changedFiles.map((f) => f.path); - debugConverter( - `Updating symbol references for ${affectedFiles.length} files...`, - ); + const affectedFiles = changedFiles.map((f) => f.path); + debugConverter( + `Updating symbol references for ${affectedFiles.length} files...` + ); - db.run("BEGIN TRANSACTION"); + db.run("BEGIN TRANSACTION"); - // Delete old references from changed files - const deleteStmt = db.prepare(` + // Delete old references from changed files + const deleteStmt = db.prepare(` DELETE FROM symbol_references WHERE file_id IN (SELECT id FROM files WHERE path = ?) `); - for (const filePath of affectedFiles) { - deleteStmt.run(filePath); - } - debugConverter(`Deleted old references for ${affectedFiles.length} files`); - - // Build symbol lookup map (scip_symbol -> id) for fast lookups - debugConverter("Building symbol ID lookup map..."); - const symbolIdMap = new Map(); - const allSymbols = db - .query("SELECT id, scip_symbol FROM symbols") - .all() as Array<{ - id: number; - scip_symbol: string; - }>; - for (const sym of allSymbols) { - symbolIdMap.set(sym.scip_symbol, sym.id); - } - debugConverter(`Symbol lookup map built: ${symbolIdMap.size} symbols`); - - // Build file ID lookup map for fast lookups - debugConverter("Building file ID lookup map..."); - const fileIdMap = new Map(); - for (const filePath of affectedFiles) { - const fileRecord = db - .query("SELECT id FROM files WHERE path = ?") - .get(filePath) as { id: number } | undefined; - if (fileRecord) { - fileIdMap.set(filePath, fileRecord.id); - } - } - debugConverter(`File lookup map built: ${fileIdMap.size} files`); - - // Insert new references from changed files - const insertStmt = db.prepare(` + for (const filePath of affectedFiles) { + deleteStmt.run(filePath); + } + debugConverter(`Deleted old references for ${affectedFiles.length} files`); + + // Build symbol lookup map (scip_symbol -> id) for fast lookups + debugConverter("Building symbol ID lookup map..."); + const symbolIdMap = new Map(); + const allSymbols = db + .query("SELECT id, scip_symbol FROM symbols") + .all() as Array<{ + id: number; + scip_symbol: string; + }>; + for (const sym of allSymbols) { + symbolIdMap.set(sym.scip_symbol, sym.id); + } + debugConverter(`Symbol lookup map built: ${symbolIdMap.size} symbols`); + + // Build file ID lookup map for fast lookups + debugConverter("Building file ID lookup map..."); + const fileIdMap = new Map(); + for (const filePath of affectedFiles) { + const fileRecord = db + .query("SELECT id FROM files WHERE path = ?") + .get(filePath) as { id: number } | undefined; + if (fileRecord) { + fileIdMap.set(filePath, fileRecord.id); + } + } + debugConverter(`File lookup map built: ${fileIdMap.size} files`); + + // Insert new references from changed files + const insertStmt = db.prepare(` INSERT INTO symbol_references (symbol_id, file_id, line) VALUES (?, ?, ?) `); - let processedCount = 0; - let totalReferences = 0; - const logInterval = Math.max(1, Math.floor(affectedFiles.length / 10)); - - for (const fromPath of affectedFiles) { - processedCount++; - - if ( - processedCount % logInterval === 0 || - processedCount === affectedFiles.length - ) { - debugConverter( - `Processing references: ${processedCount}/${ - affectedFiles.length - } (${Math.floor( - (processedCount / affectedFiles.length) * 100, - )}%) - ${totalReferences} refs inserted`, - ); - } - - const doc = documentsByPath.get(fromPath); - if (!doc) continue; - - const fromFileId = fileIdMap.get(fromPath); - if (!fromFileId) continue; - - // Extract references from occurrences - const references = extractReferences(doc); - - // For each reference, look up symbol ID from map - for (const ref of references) { - // Skip local symbols - if (ref.symbol.includes("local")) continue; - - const symbolId = symbolIdMap.get(ref.symbol); - if (!symbolId) continue; - - insertStmt.run(symbolId, fromFileId, ref.line); - totalReferences++; - } - } - - debugConverter(`Total references inserted: ${totalReferences}`); - - debugConverter("Committing symbol references transaction..."); - db.run("COMMIT"); - debugConverter("Symbol references transaction committed"); + let processedCount = 0; + let totalReferences = 0; + const logInterval = Math.max(1, Math.floor(affectedFiles.length / 10)); + + for (const fromPath of affectedFiles) { + processedCount++; + + if ( + processedCount % logInterval === 0 || + processedCount === affectedFiles.length + ) { + debugConverter( + `Processing references: ${processedCount}/${ + affectedFiles.length + } (${Math.floor( + (processedCount / affectedFiles.length) * 100 + )}%) - ${totalReferences} refs inserted` + ); + } + + const doc = documentsByPath.get(fromPath); + if (!doc) continue; + + const fromFileId = fileIdMap.get(fromPath); + if (!fromFileId) continue; + + // Extract references from occurrences + const references = extractReferences(doc); + + // For each reference, look up symbol ID from map + for (const ref of references) { + // Skip local symbols + if (ref.symbol.includes("local")) continue; + + const symbolId = symbolIdMap.get(ref.symbol); + if (!symbolId) continue; + + insertStmt.run(symbolId, fromFileId, ref.line); + totalReferences++; + } + } + + debugConverter(`Total references inserted: ${totalReferences}`); + + debugConverter("Committing symbol references transaction..."); + db.run("COMMIT"); + debugConverter("Symbol references transaction committed"); } /** * Update denormalized fields for performance */ function updateDenormalizedFields(db: Database): void { - // Update file symbol counts - debugConverter("Computing file symbol counts..."); - db.run(` + // Update file symbol counts + debugConverter("Computing file symbol counts..."); + db.run(` UPDATE files SET symbol_count = ( SELECT COUNT(*) @@ -1111,11 +1106,11 @@ function updateDenormalizedFields(db: Database): void { WHERE s.file_id = files.id ) `); - debugConverter("File symbol counts updated"); + debugConverter("File symbol counts updated"); - // Update symbol reference counts - debugConverter("Computing symbol reference counts..."); - db.run(` + // Update symbol reference counts + debugConverter("Computing symbol reference counts..."); + db.run(` UPDATE symbols SET reference_count = ( SELECT COUNT(*) @@ -1123,11 +1118,11 @@ function updateDenormalizedFields(db: Database): void { WHERE sr.symbol_id = symbols.id ) `); - debugConverter("Symbol reference counts updated"); + debugConverter("Symbol reference counts updated"); - // Update file dependency counts (outgoing dependencies) - debugConverter("Computing file dependency counts..."); - db.run(` + // Update file dependency counts (outgoing dependencies) + debugConverter("Computing file dependency counts..."); + db.run(` UPDATE files SET dependency_count = ( SELECT COUNT(DISTINCT to_file_id) @@ -1135,11 +1130,11 @@ function updateDenormalizedFields(db: Database): void { WHERE d.from_file_id = files.id ) `); - debugConverter("File dependency counts updated"); + debugConverter("File dependency counts updated"); - // Update file dependent counts (incoming dependencies / fan-in) - debugConverter("Computing file dependent counts..."); - db.run(` + // Update file dependent counts (incoming dependencies / fan-in) + debugConverter("Computing file dependent counts..."); + db.run(` UPDATE files SET dependent_count = ( SELECT COUNT(DISTINCT from_file_id) @@ -1147,43 +1142,43 @@ function updateDenormalizedFields(db: Database): void { WHERE d.to_file_id = files.id ) `); - debugConverter("File dependent counts updated"); + debugConverter("File dependent counts updated"); } /** * Update packages table */ function updatePackages({ - db, - skipIfNoChanges = false, + db, + skipIfNoChanges = false, }: { - db: Database; - skipIfNoChanges?: boolean; + db: Database; + skipIfNoChanges?: boolean; }): void { - if (skipIfNoChanges) { - // Check if packages table needs update - const packageCount = ( - db.query("SELECT COUNT(*) as c FROM packages").get() as { - c: number; - } - ).c; - const symbolPackageCount = ( - db - .query( - "SELECT COUNT(DISTINCT package) as c FROM symbols WHERE package IS NOT NULL", - ) - .get() as { c: number } - ).c; - - // Skip if counts match (no new packages) - if (packageCount === symbolPackageCount) { - return; - } - } - - db.run("DELETE FROM packages"); - - db.run(` + if (skipIfNoChanges) { + // Check if packages table needs update + const packageCount = ( + db.query("SELECT COUNT(*) as c FROM packages").get() as { + c: number; + } + ).c; + const symbolPackageCount = ( + db + .query( + "SELECT COUNT(DISTINCT package) as c FROM symbols WHERE package IS NOT NULL" + ) + .get() as { c: number } + ).c; + + // Skip if counts match (no new packages) + if (packageCount === symbolPackageCount) { + return; + } + } + + db.run("DELETE FROM packages"); + + db.run(` INSERT INTO packages (name, manager, symbol_count) SELECT package, @@ -1199,43 +1194,42 @@ function updatePackages({ * Update metadata table */ function updateMetadata({ - db, - mode, - changedFiles, - deletedFiles, + db, + mode, + changedFiles, + deletedFiles, }: { - db: Database; - mode: string; - changedFiles: number; - deletedFiles: number; + db: Database; + mode: string; + changedFiles: number; + deletedFiles: number; }): ConversionStats { - const totalFiles = ( - db.query("SELECT COUNT(*) as c FROM files").get() as { c: number } - ).c; - const totalSymbols = ( - db.query("SELECT COUNT(*) as c FROM symbols").get() as { c: number } - ).c; - - const metadata = { - last_indexed: new Date().toISOString(), - total_files: totalFiles.toString(), - total_symbols: totalSymbols.toString(), - }; - - for (const [key, value] of Object.entries(metadata)) { - db.run( - "INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)", - key, - value, - ); - } - - return { - mode: mode as "full" | "incremental", - total_files: totalFiles, - total_symbols: totalSymbols, - changed_files: changedFiles, - deleted_files: deletedFiles, - time_ms: 0, // Will be set by caller - }; + const totalFiles = ( + db.query("SELECT COUNT(*) as c FROM files").get() as { c: number } + ).c; + const totalSymbols = ( + db.query("SELECT COUNT(*) as c FROM symbols").get() as { c: number } + ).c; + + const metadata = { + last_indexed: new Date().toISOString(), + total_files: totalFiles.toString(), + total_symbols: totalSymbols.toString(), + }; + + for (const [key, value] of Object.entries(metadata)) { + db.run("INSERT OR REPLACE INTO metadata (key, value) VALUES (?, ?)", [ + key, + value, + ]); + } + + return { + mode: mode as "full" | "incremental", + total_files: totalFiles, + total_symbols: totalSymbols, + changed_files: changedFiles, + deleted_files: deletedFiles, + time_ms: 0, // Will be set by caller + }; } diff --git a/src/converter/documents.ts b/src/converter/documents.ts index 5a8b23b..aecbbb4 100644 --- a/src/converter/documents.ts +++ b/src/converter/documents.ts @@ -270,6 +270,7 @@ function extractReferences({ // Process each line for file path matching for (let lineIndex = 0; lineIndex < lines.length; lineIndex++) { const lineContent = lines[lineIndex]; + if (!lineContent) continue; const lineNumber = lineIndex + 1; // 1-indexed // Match file paths (direct mentions) @@ -287,6 +288,7 @@ function extractReferences({ let match; while ((match = linkRegex.exec(lineContent)) !== null) { const linkPath = match[2]; + if (!linkPath) continue; // Skip URLs if (linkPath.startsWith("http://") || linkPath.startsWith("https://")) { @@ -295,6 +297,7 @@ function extractReferences({ // Normalize path const normalized = normalizePath(linkPath); + if (!normalized) continue; const fileId = filePathMap.get(normalized); if (fileId !== undefined) { diff --git a/src/converter/helpers.ts b/src/converter/helpers.ts index 75a4885..a6c7670 100644 --- a/src/converter/helpers.ts +++ b/src/converter/helpers.ts @@ -161,19 +161,19 @@ export function extractPackageFromScip( // NPM packages const npmMatch = scipSymbol.match(/npm\s+(@?[\w\-@/.]+)\s+/); - if (npmMatch) { + if (npmMatch && npmMatch[1]) { return npmMatch[1]; } // Maven packages const mavenMatch = scipSymbol.match(/maven\s+([\w.-]+)\s+([\w.-]+)\s+/); - if (mavenMatch) { + if (mavenMatch && mavenMatch[1] && mavenMatch[2]) { return `${mavenMatch[1]}:${mavenMatch[2]}`; } // Go packages const goMatch = scipSymbol.match(/go\s+([\w.\-/]+)\s+/); - if (goMatch) { + if (goMatch && goMatch[1]) { return goMatch[1]; } @@ -191,13 +191,14 @@ export function extractNameFromScip(scipSymbol: string): string { // Extract the last segment after the last `/` and before `#` const match = scipSymbol.match(/\/([^/`]+)#/); - if (match) { + if (match && match[1]) { return match[1]; } // Fallback: return last segment const segments = scipSymbol.split("/"); const lastSegment = segments[segments.length - 1]; + if (!lastSegment) return "unknown"; return lastSegment.replace(/#.*$/, ""); } @@ -260,11 +261,11 @@ export function extractKindFromDocumentation( const lines = cleanedDoc.split("\n").filter((l) => l.trim().length > 0); if (lines.length === 0) return "unknown"; - const firstLine = lines[0].trim(); + const firstLine = lines[0]!.trim(); // Match patterns in parentheses: "(property)", "(method)", "(parameter)", etc. const parenMatch = firstLine.match(/^\(([^)]+)\)/); - if (parenMatch) { + if (parenMatch && parenMatch[1]) { const kind = parenMatch[1].toLowerCase(); // Handle compound kinds like "enum member" if (kind.includes("enum member")) return "enum_member"; diff --git a/src/converter/scip-parser.ts b/src/converter/scip-parser.ts index 6ae96ca..1d2b5d6 100644 --- a/src/converter/scip-parser.ts +++ b/src/converter/scip-parser.ts @@ -151,10 +151,10 @@ function parseOccurrence(occ: Occurrence): ParsedOccurrence { let range: [number, number, number, number]; if (occ.range.length === 3) { // [startLine, startChar, endChar] - same line - range = [occ.range[0], occ.range[1], occ.range[0], occ.range[2]]; + range = [occ.range[0]!, occ.range[1]!, occ.range[0]!, occ.range[2]!]; } else if (occ.range.length === 4) { // [startLine, startChar, endLine, endChar] - range = [occ.range[0], occ.range[1], occ.range[2], occ.range[3]]; + range = [occ.range[0]!, occ.range[1]!, occ.range[2]!, occ.range[3]!]; } else { throw new Error(`Invalid range format: ${occ.range}`); } @@ -164,17 +164,17 @@ function parseOccurrence(occ: Occurrence): ParsedOccurrence { if (occ.enclosingRange && occ.enclosingRange.length > 0) { if (occ.enclosingRange.length === 3) { enclosingRange = [ - occ.enclosingRange[0], - occ.enclosingRange[1], - occ.enclosingRange[0], - occ.enclosingRange[2], + occ.enclosingRange[0]!, + occ.enclosingRange[1]!, + occ.enclosingRange[0]!, + occ.enclosingRange[2]!, ]; } else if (occ.enclosingRange.length === 4) { enclosingRange = [ - occ.enclosingRange[0], - occ.enclosingRange[1], - occ.enclosingRange[2], - occ.enclosingRange[3], + occ.enclosingRange[0]!, + occ.enclosingRange[1]!, + occ.enclosingRange[2]!, + occ.enclosingRange[3]!, ]; } } diff --git a/src/types/modules.d.ts b/src/types/modules.d.ts new file mode 100644 index 0000000..a47bd62 --- /dev/null +++ b/src/types/modules.d.ts @@ -0,0 +1,4 @@ +declare module "*.md" { + const content: string; + export default content; +} diff --git a/src/utils/config.ts b/src/utils/config.ts index fa8b40f..ad9e88f 100644 --- a/src/utils/config.ts +++ b/src/utils/config.ts @@ -105,6 +105,9 @@ export function validateConfig(data: unknown): Config { if (!result.success) { // Convert Zod errors to more user-friendly messages const firstError = result.error.issues[0]; + if (!firstError) { + throw new CtxError("Invalid config: unknown validation error"); + } const field = firstError.path.join("."); throw new CtxError( `Invalid config: ${field ? `field '${field}' ` : ""}${firstError.message}`, @@ -270,7 +273,14 @@ export function createDefaultConfig(params: { root: params.root, scip: ".dora/index.scip", db: ".dora/dora.db", - language: params.language, + language: params.language as + | "typescript" + | "javascript" + | "python" + | "rust" + | "go" + | "java" + | undefined, commands: { index: indexCommand, },