diff --git a/src/api/database.ts b/src/api/database.ts index 3f786fca9..41dfd6d40 100644 --- a/src/api/database.ts +++ b/src/api/database.ts @@ -449,6 +449,182 @@ async function assertTableExists( return rows.length > 0; } +/** + * Validate a SQL query to ensure it is read-only. + * Returns an error message if the query contains mutation keywords or dangerous functions. + * Returns null if the query appears safe (read-only). + * + * Security note: This validator attempts to strip comments and string literals + * before checking for keywords. It is not a full SQL parser and may be bypassed + * by sufficiently complex obfuscation. However, it blocks common attacks and + * accidental mutations. + */ +export function validateReadOnlyQuery(sqlText: string): string | null { + // Strip block comments (/* ... */) and line comments (-- ...). + // Replace with a space to prevent keyword concatenation (e.g. DELETE/**/FROM -> DELETE FROM). + const stripped = sqlText + .replace(/\/\*[\s\S]*?\*\//g, " ") + .replace(/--.*$/gm, " ") + .trim(); + + // Strip string literals so that mutation keywords/functions inside quoted + // strings are ignored. Handles single-quoted ('...'), dollar-quoted + // ($$...$$), and tagged dollar-quoted ($tag$...$tag$) strings. + const noLiterals = stripped + .replace(/\$([A-Za-z0-9_]*)\$[\s\S]*?\$\1\$/g, " ") + .replace(/'(?:[^']|'')*'/g, " "); + + // For keyword checks, also strip double-quoted identifiers to avoid + // matching words inside quoted table/column names. + const noStrings = noLiterals.replace(/"(?:[^"]|"")*"/g, " "); + + const mutationKeywords = [ + // ── DML ──────────────────────────────────────────────────────────── + "INSERT", + "UPDATE", + "DELETE", + "INTO", + "COPY", + "MERGE", + // ── DDL ──────────────────────────────────────────────────────────── + "DROP", + "ALTER", + "TRUNCATE", + "CREATE", + "COMMENT", + // ── Admin / privilege ────────────────────────────────────────────── + "GRANT", + "REVOKE", + "SET", + "RESET", + "LOAD", + // ── Maintenance ──────────────────────────────────────────────────── + "VACUUM", + "REINDEX", + "CLUSTER", + "REFRESH", + "DISCARD", + // ── Procedural ───────────────────────────────────────────────────── + "CALL", + "DO", + // ── Async notifications (side-effects) ───────────────────────────── + "LISTEN", + "UNLISTEN", + "NOTIFY", + // ── Prepared statements (can wrap mutations) ─────────────────────── + "PREPARE", + "EXECUTE", + "DEALLOCATE", + // ── Locking ──────────────────────────────────────────────────────── + "LOCK", + ]; + // Match mutation keywords as whole words (word boundary) anywhere in the + // query, catching them inside CTEs, subqueries, etc. + const mutationPattern = new RegExp( + `\\b(${mutationKeywords.join("|")})\\b`, + "i", + ); + const match = mutationPattern.exec(noStrings); + if (match) { + return `Query rejected: "${match[1].toUpperCase()}" is a mutation keyword. Set readOnly: false to execute mutations.`; + } + + // PostgreSQL built-in functions that can read/write server files, mutate + // server state, or cause denial of service. These appear inside otherwise + // valid SELECT expressions, so keyword checks alone won't catch them. + // + // ── File I/O (arbitrary file read/write on the DB server) ───────── + // lo_import('/etc/passwd') — load file into large object + // lo_export(oid, '/tmp/evil') — write large object to file + // lo_unlink(oid) — delete large object + // pg_read_file('/etc/passwd') — read server file (superuser) + // pg_read_binary_file(...) — same, binary + // pg_write_file(...) — write to server files (ext. module) + // pg_stat_file(...) — stat a server file + // pg_ls_dir(...) — list server directory + // + // ── Sequence / state mutation ──────────────────────────────────── + // nextval('seq'), setval('seq', n) + // + // ── Denial of service ──────────────────────────────────────────── + // pg_sleep(n) — block connection for n seconds + // pg_sleep_for(interval) — same, interval version + // pg_sleep_until(timestamp) — same, deadline version + // + // ── Session / backend control ──────────────────────────────────── + // pg_terminate_backend(pid) — kill another connection + // pg_cancel_backend(pid) — cancel a running query + // pg_reload_conf() — reload server configuration + // pg_rotate_logfile() — rotate the server log + // set_config(name, value, local) — SET equivalent as function + // + // ── Advisory locks (can deadlock other connections) ─────────────── + // pg_advisory_lock(key) — session-level advisory lock + // pg_advisory_lock_shared(key) + // pg_try_advisory_lock(key) + const dangerousFunctions = [ + // File I/O + "lo_import", + "lo_export", + "lo_unlink", + "lo_put", + "lo_from_bytea", + "pg_read_file", + "pg_read_binary_file", + "pg_write_file", + "pg_stat_file", + "pg_ls_dir", + "pg_ls_logdir", + "pg_ls_waldir", + "pg_ls_tmpdir", + "pg_ls_archive_statusdir", + // Sequence / state mutation + "nextval", + "setval", + // Denial of service + "pg_sleep", + "pg_sleep_for", + "pg_sleep_until", + // Session / backend control + "pg_terminate_backend", + "pg_cancel_backend", + "pg_reload_conf", + "pg_rotate_logfile", + "set_config", + // Advisory locks + "pg_advisory_lock", + "pg_advisory_lock_shared", + "pg_try_advisory_lock", + "pg_try_advisory_lock_shared", + "pg_advisory_xact_lock", + "pg_advisory_xact_lock_shared", + "pg_advisory_unlock", + "pg_advisory_unlock_shared", + "pg_advisory_unlock_all", + ]; + const dangerousFnPattern = new RegExp( + `(?:^|[^\\w$])"?(?:${dangerousFunctions.join("|")})"?\\s*\\(`, + "i", + ); + const fnMatch = dangerousFnPattern.exec(noLiterals); + if (fnMatch) { + // Extract the function name from the match for the error message. + const fnNameMatch = fnMatch[0].match( + new RegExp(`(${dangerousFunctions.join("|")})`, "i"), + ); + const fnName = fnNameMatch ? fnNameMatch[1].toUpperCase() : "UNKNOWN"; + return `Query rejected: "${fnName}" is a dangerous function that can modify server state. Set readOnly: false to execute this query.`; + } + + // Reject multi-statement queries (naive: any semicolon not at the very end) + const trimmedForSemicolon = stripped.replace(/;\s*$/, ""); + if (trimmedForSemicolon.includes(";")) { + return "Query rejected: multi-statement queries are not allowed in read-only mode."; + } + + return null; +} + // --------------------------------------------------------------------------- // Route handlers // --------------------------------------------------------------------------- @@ -1062,183 +1238,10 @@ async function handleQuery( const sqlText = body.sql.trim(); // If readOnly mode, reject mutation statements. - // Strip SQL comments, then scan for mutation keywords *anywhere* in the - // query — not just the leading keyword. This prevents bypass via CTEs - // (WITH ... AS (DELETE ...)) and other SQL constructs that nest mutations. if (body.readOnly !== false) { - // Strip block comments (/* ... */) and line comments (-- ...). - // Use empty-string replacement (not space) to mirror how PostgreSQL - // concatenates tokens across comments — e.g. DE/* */LETE → DELETE. - // A space replacement would turn it into "DE LETE", hiding the keyword. - const stripped = sqlText - .replace(/\/\*[\s\S]*?\*\//g, "") - .replace(/--.*$/gm, "") - .trim(); - - // Strip string literals so that mutation keywords/functions inside quoted - // strings are ignored. Handles single-quoted ('...'), dollar-quoted - // ($$...$$), and tagged dollar-quoted ($tag$...$tag$) strings. - const noLiterals = stripped - .replace(/\$([A-Za-z0-9_]*)\$[\s\S]*?\$\1\$/g, " ") - .replace(/'(?:[^']|'')*'/g, " "); - - // For keyword checks, also strip double-quoted identifiers to avoid - // matching words inside quoted table/column names. - const noStrings = noLiterals.replace(/"(?:[^"]|"")*"/g, " "); - - const mutationKeywords = [ - // ── DML ──────────────────────────────────────────────────────────── - "INSERT", - "UPDATE", - "DELETE", - "INTO", - "COPY", - "MERGE", - // ── DDL ──────────────────────────────────────────────────────────── - "DROP", - "ALTER", - "TRUNCATE", - "CREATE", - "COMMENT", - // ── Admin / privilege ────────────────────────────────────────────── - "GRANT", - "REVOKE", - "SET", - "RESET", - "LOAD", - // ── Maintenance ──────────────────────────────────────────────────── - "VACUUM", - "REINDEX", - "CLUSTER", - "REFRESH", - "DISCARD", - // ── Procedural ───────────────────────────────────────────────────── - "CALL", - "DO", - // ── Async notifications (side-effects) ───────────────────────────── - "LISTEN", - "UNLISTEN", - "NOTIFY", - // ── Prepared statements (can wrap mutations) ─────────────────────── - "PREPARE", - "EXECUTE", - "DEALLOCATE", - // ── Locking ──────────────────────────────────────────────────────── - "LOCK", - ]; - // Match mutation keywords as whole words (word boundary) anywhere in the - // query, catching them inside CTEs, subqueries, etc. - const mutationPattern = new RegExp( - `\\b(${mutationKeywords.join("|")})\\b`, - "i", - ); - const match = mutationPattern.exec(noStrings); - if (match) { - sendJsonError( - res, - `Query rejected: "${match[1].toUpperCase()}" is a mutation keyword. Set readOnly: false to execute mutations.`, - ); - return; - } - - // PostgreSQL built-in functions that can read/write server files, mutate - // server state, or cause denial of service. These appear inside otherwise - // valid SELECT expressions, so keyword checks alone won't catch them. - // - // ── File I/O (arbitrary file read/write on the DB server) ───────── - // lo_import('/etc/passwd') — load file into large object - // lo_export(oid, '/tmp/evil') — write large object to file - // lo_unlink(oid) — delete large object - // pg_read_file('/etc/passwd') — read server file (superuser) - // pg_read_binary_file(...) — same, binary - // pg_write_file(...) — write to server files (ext. module) - // pg_stat_file(...) — stat a server file - // pg_ls_dir(...) — list server directory - // - // ── Sequence / state mutation ──────────────────────────────────── - // nextval('seq'), setval('seq', n) - // - // ── Denial of service ──────────────────────────────────────────── - // pg_sleep(n) — block connection for n seconds - // pg_sleep_for(interval) — same, interval version - // pg_sleep_until(timestamp) — same, deadline version - // - // ── Session / backend control ──────────────────────────────────── - // pg_terminate_backend(pid) — kill another connection - // pg_cancel_backend(pid) — cancel a running query - // pg_reload_conf() — reload server configuration - // pg_rotate_logfile() — rotate the server log - // set_config(name, value, local) — SET equivalent as function - // - // ── Advisory locks (can deadlock other connections) ─────────────── - // pg_advisory_lock(key) — session-level advisory lock - // pg_advisory_lock_shared(key) - // pg_try_advisory_lock(key) - const dangerousFunctions = [ - // File I/O - "lo_import", - "lo_export", - "lo_unlink", - "lo_put", - "lo_from_bytea", - "pg_read_file", - "pg_read_binary_file", - "pg_write_file", - "pg_stat_file", - "pg_ls_dir", - "pg_ls_logdir", - "pg_ls_waldir", - "pg_ls_tmpdir", - "pg_ls_archive_statusdir", - // Sequence / state mutation - "nextval", - "setval", - // Denial of service - "pg_sleep", - "pg_sleep_for", - "pg_sleep_until", - // Session / backend control - "pg_terminate_backend", - "pg_cancel_backend", - "pg_reload_conf", - "pg_rotate_logfile", - "set_config", - // Advisory locks - "pg_advisory_lock", - "pg_advisory_lock_shared", - "pg_try_advisory_lock", - "pg_try_advisory_lock_shared", - "pg_advisory_xact_lock", - "pg_advisory_xact_lock_shared", - "pg_advisory_unlock", - "pg_advisory_unlock_shared", - "pg_advisory_unlock_all", - ]; - const dangerousFnPattern = new RegExp( - `(?:^|[^\\w$])"?(?:${dangerousFunctions.join("|")})"?\\s*\\(`, - "i", - ); - const fnMatch = dangerousFnPattern.exec(noLiterals); - if (fnMatch) { - // Extract the function name from the match for the error message. - const fnNameMatch = fnMatch[0].match( - new RegExp(`(${dangerousFunctions.join("|")})`, "i"), - ); - const fnName = fnNameMatch ? fnNameMatch[1].toUpperCase() : "UNKNOWN"; - sendJsonError( - res, - `Query rejected: "${fnName}" is a dangerous function that can modify server state. Set readOnly: false to execute this query.`, - ); - return; - } - - // Reject multi-statement queries (naive: any semicolon not at the very end) - const trimmedForSemicolon = stripped.replace(/;\s*$/, ""); - if (trimmedForSemicolon.includes(";")) { - sendJsonError( - res, - "Query rejected: multi-statement queries are not allowed in read-only mode.", - ); + const error = validateReadOnlyQuery(sqlText); + if (error) { + sendJsonError(res, error); return; } }