Skip to content
Draft
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
355 changes: 179 additions & 176 deletions src/api/database.ts
Original file line number Diff line number Diff line change
Expand Up @@ -449,6 +449,182 @@ async function assertTableExists(
return rows.length > 0;
}

/**
* Validate a SQL query to ensure it is read-only.
* Returns an error message if the query contains mutation keywords or dangerous functions.
* Returns null if the query appears safe (read-only).
*
* Security note: This validator attempts to strip comments and string literals
* before checking for keywords. It is not a full SQL parser and may be bypassed
* by sufficiently complex obfuscation. However, it blocks common attacks and
* accidental mutations.
*/
export function validateReadOnlyQuery(sqlText: string): string | null {
// Strip block comments (/* ... */) and line comments (-- ...).
// Replace with a space to prevent keyword concatenation (e.g. DELETE/**/FROM -> DELETE FROM).
const stripped = sqlText
.replace(/\/\*[\s\S]*?\*\//g, " ")
.replace(/--.*$/gm, " ")
.trim();

// Strip string literals so that mutation keywords/functions inside quoted
// strings are ignored. Handles single-quoted ('...'), dollar-quoted
// ($$...$$), and tagged dollar-quoted ($tag$...$tag$) strings.
const noLiterals = stripped
.replace(/\$([A-Za-z0-9_]*)\$[\s\S]*?\$\1\$/g, " ")
.replace(/'(?:[^']|'')*'/g, " ");

// For keyword checks, also strip double-quoted identifiers to avoid
// matching words inside quoted table/column names.
const noStrings = noLiterals.replace(/"(?:[^"]|"")*"/g, " ");

const mutationKeywords = [
// ── DML ────────────────────────────────────────────────────────────
"INSERT",
"UPDATE",
"DELETE",
"INTO",
"COPY",
"MERGE",
// ── DDL ────────────────────────────────────────────────────────────
"DROP",
"ALTER",
"TRUNCATE",
"CREATE",
"COMMENT",
// ── Admin / privilege ──────────────────────────────────────────────
"GRANT",
"REVOKE",
"SET",
"RESET",
"LOAD",
// ── Maintenance ────────────────────────────────────────────────────
"VACUUM",
"REINDEX",
"CLUSTER",
"REFRESH",
"DISCARD",
// ── Procedural ─────────────────────────────────────────────────────
"CALL",
"DO",
// ── Async notifications (side-effects) ─────────────────────────────
"LISTEN",
"UNLISTEN",
"NOTIFY",
// ── Prepared statements (can wrap mutations) ───────────────────────
"PREPARE",
"EXECUTE",
"DEALLOCATE",
// ── Locking ────────────────────────────────────────────────────────
"LOCK",
];
// Match mutation keywords as whole words (word boundary) anywhere in the
// query, catching them inside CTEs, subqueries, etc.
const mutationPattern = new RegExp(
`\\b(${mutationKeywords.join("|")})\\b`,
"i",
);
const match = mutationPattern.exec(noStrings);
if (match) {
return `Query rejected: "${match[1].toUpperCase()}" is a mutation keyword. Set readOnly: false to execute mutations.`;
}

// PostgreSQL built-in functions that can read/write server files, mutate
// server state, or cause denial of service. These appear inside otherwise
// valid SELECT expressions, so keyword checks alone won't catch them.
//
// ── File I/O (arbitrary file read/write on the DB server) ─────────
// lo_import('/etc/passwd') β€” load file into large object
// lo_export(oid, '/tmp/evil') β€” write large object to file
// lo_unlink(oid) β€” delete large object
// pg_read_file('/etc/passwd') β€” read server file (superuser)
// pg_read_binary_file(...) β€” same, binary
// pg_write_file(...) β€” write to server files (ext. module)
// pg_stat_file(...) β€” stat a server file
// pg_ls_dir(...) β€” list server directory
//
// ── Sequence / state mutation ────────────────────────────────────
// nextval('seq'), setval('seq', n)
//
// ── Denial of service ────────────────────────────────────────────
// pg_sleep(n) β€” block connection for n seconds
// pg_sleep_for(interval) β€” same, interval version
// pg_sleep_until(timestamp) β€” same, deadline version
//
// ── Session / backend control ────────────────────────────────────
// pg_terminate_backend(pid) β€” kill another connection
// pg_cancel_backend(pid) β€” cancel a running query
// pg_reload_conf() β€” reload server configuration
// pg_rotate_logfile() β€” rotate the server log
// set_config(name, value, local) β€” SET equivalent as function
//
// ── Advisory locks (can deadlock other connections) ───────────────
// pg_advisory_lock(key) β€” session-level advisory lock
// pg_advisory_lock_shared(key)
// pg_try_advisory_lock(key)
const dangerousFunctions = [
// File I/O
"lo_import",
"lo_export",
"lo_unlink",
"lo_put",
"lo_from_bytea",
"pg_read_file",
"pg_read_binary_file",
"pg_write_file",
"pg_stat_file",
"pg_ls_dir",
"pg_ls_logdir",
"pg_ls_waldir",
"pg_ls_tmpdir",
"pg_ls_archive_statusdir",
// Sequence / state mutation
"nextval",
"setval",
// Denial of service
"pg_sleep",
"pg_sleep_for",
"pg_sleep_until",
// Session / backend control
"pg_terminate_backend",
"pg_cancel_backend",
"pg_reload_conf",
"pg_rotate_logfile",
"set_config",
// Advisory locks
"pg_advisory_lock",
"pg_advisory_lock_shared",
"pg_try_advisory_lock",
"pg_try_advisory_lock_shared",
"pg_advisory_xact_lock",
"pg_advisory_xact_lock_shared",
"pg_advisory_unlock",
"pg_advisory_unlock_shared",
"pg_advisory_unlock_all",
];
const dangerousFnPattern = new RegExp(
`(?:^|[^\\w$])"?(?:${dangerousFunctions.join("|")})"?\\s*\\(`,
"i",
);
const fnMatch = dangerousFnPattern.exec(noLiterals);
if (fnMatch) {
// Extract the function name from the match for the error message.
const fnNameMatch = fnMatch[0].match(
new RegExp(`(${dangerousFunctions.join("|")})`, "i"),
);
const fnName = fnNameMatch ? fnNameMatch[1].toUpperCase() : "UNKNOWN";
return `Query rejected: "${fnName}" is a dangerous function that can modify server state. Set readOnly: false to execute this query.`;
}
Comment on lines +605 to +617

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

medium

You can make this code more efficient and readable by using a capturing group in the dangerousFnPattern regex to extract the function name directly, instead of running a second regex match. This avoids the need for fnMatch[0].match(...).

  const dangerousFnPattern = new RegExp(
    `(?:^|[^\\w$])"?(${dangerousFunctions.join("|")})"?\\s*\\(`,
    "i",
  );
  const fnMatch = dangerousFnPattern.exec(noLiterals);
  if (fnMatch) {
    const fnName = fnMatch[1].toUpperCase();
    return `Query rejected: "${fnName}" is a dangerous function that can modify server state. Set readOnly: false to execute this query.`;
  }


// Reject multi-statement queries (naive: any semicolon not at the very end)
const trimmedForSemicolon = stripped.replace(/;\s*$/, "");
if (trimmedForSemicolon.includes(";")) {
return "Query rejected: multi-statement queries are not allowed in read-only mode.";
}

return null;
}
Comment on lines +462 to +626

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Security: Read-Only Query Validation is Not a Full SQL Parser

The validateReadOnlyQuery function (lines 462-626) attempts to block mutation queries and dangerous functions by stripping comments and string literals, then searching for keywords. However, this approach is not as robust as a true SQL parser and can be bypassed by sufficiently obfuscated queries (e.g., using Unicode homoglyphs, unusual whitespace, or creative CTEs). The code comments acknowledge this limitation, but the risk remains significant if the API is exposed to untrusted users.

Recommendation:

  • Consider using a proper SQL parser or a third-party library designed for query analysis to enforce read-only constraints more reliably.
  • If this is not feasible, ensure that the API is only accessible to trusted users, and document the limitations clearly in the API documentation.


// ---------------------------------------------------------------------------
// Route handlers
// ---------------------------------------------------------------------------
Expand Down Expand Up @@ -1062,183 +1238,10 @@ async function handleQuery(
const sqlText = body.sql.trim();

// If readOnly mode, reject mutation statements.
// Strip SQL comments, then scan for mutation keywords *anywhere* in the
// query β€” not just the leading keyword. This prevents bypass via CTEs
// (WITH ... AS (DELETE ...)) and other SQL constructs that nest mutations.
if (body.readOnly !== false) {
// Strip block comments (/* ... */) and line comments (-- ...).
// Use empty-string replacement (not space) to mirror how PostgreSQL
// concatenates tokens across comments β€” e.g. DE/* */LETE β†’ DELETE.
// A space replacement would turn it into "DE LETE", hiding the keyword.
const stripped = sqlText
.replace(/\/\*[\s\S]*?\*\//g, "")
.replace(/--.*$/gm, "")
.trim();

// Strip string literals so that mutation keywords/functions inside quoted
// strings are ignored. Handles single-quoted ('...'), dollar-quoted
// ($$...$$), and tagged dollar-quoted ($tag$...$tag$) strings.
const noLiterals = stripped
.replace(/\$([A-Za-z0-9_]*)\$[\s\S]*?\$\1\$/g, " ")
.replace(/'(?:[^']|'')*'/g, " ");

// For keyword checks, also strip double-quoted identifiers to avoid
// matching words inside quoted table/column names.
const noStrings = noLiterals.replace(/"(?:[^"]|"")*"/g, " ");

const mutationKeywords = [
// ── DML ────────────────────────────────────────────────────────────
"INSERT",
"UPDATE",
"DELETE",
"INTO",
"COPY",
"MERGE",
// ── DDL ────────────────────────────────────────────────────────────
"DROP",
"ALTER",
"TRUNCATE",
"CREATE",
"COMMENT",
// ── Admin / privilege ──────────────────────────────────────────────
"GRANT",
"REVOKE",
"SET",
"RESET",
"LOAD",
// ── Maintenance ────────────────────────────────────────────────────
"VACUUM",
"REINDEX",
"CLUSTER",
"REFRESH",
"DISCARD",
// ── Procedural ─────────────────────────────────────────────────────
"CALL",
"DO",
// ── Async notifications (side-effects) ─────────────────────────────
"LISTEN",
"UNLISTEN",
"NOTIFY",
// ── Prepared statements (can wrap mutations) ───────────────────────
"PREPARE",
"EXECUTE",
"DEALLOCATE",
// ── Locking ────────────────────────────────────────────────────────
"LOCK",
];
// Match mutation keywords as whole words (word boundary) anywhere in the
// query, catching them inside CTEs, subqueries, etc.
const mutationPattern = new RegExp(
`\\b(${mutationKeywords.join("|")})\\b`,
"i",
);
const match = mutationPattern.exec(noStrings);
if (match) {
sendJsonError(
res,
`Query rejected: "${match[1].toUpperCase()}" is a mutation keyword. Set readOnly: false to execute mutations.`,
);
return;
}

// PostgreSQL built-in functions that can read/write server files, mutate
// server state, or cause denial of service. These appear inside otherwise
// valid SELECT expressions, so keyword checks alone won't catch them.
//
// ── File I/O (arbitrary file read/write on the DB server) ─────────
// lo_import('/etc/passwd') β€” load file into large object
// lo_export(oid, '/tmp/evil') β€” write large object to file
// lo_unlink(oid) β€” delete large object
// pg_read_file('/etc/passwd') β€” read server file (superuser)
// pg_read_binary_file(...) β€” same, binary
// pg_write_file(...) β€” write to server files (ext. module)
// pg_stat_file(...) β€” stat a server file
// pg_ls_dir(...) β€” list server directory
//
// ── Sequence / state mutation ────────────────────────────────────
// nextval('seq'), setval('seq', n)
//
// ── Denial of service ────────────────────────────────────────────
// pg_sleep(n) β€” block connection for n seconds
// pg_sleep_for(interval) β€” same, interval version
// pg_sleep_until(timestamp) β€” same, deadline version
//
// ── Session / backend control ────────────────────────────────────
// pg_terminate_backend(pid) β€” kill another connection
// pg_cancel_backend(pid) β€” cancel a running query
// pg_reload_conf() β€” reload server configuration
// pg_rotate_logfile() β€” rotate the server log
// set_config(name, value, local) β€” SET equivalent as function
//
// ── Advisory locks (can deadlock other connections) ───────────────
// pg_advisory_lock(key) β€” session-level advisory lock
// pg_advisory_lock_shared(key)
// pg_try_advisory_lock(key)
const dangerousFunctions = [
// File I/O
"lo_import",
"lo_export",
"lo_unlink",
"lo_put",
"lo_from_bytea",
"pg_read_file",
"pg_read_binary_file",
"pg_write_file",
"pg_stat_file",
"pg_ls_dir",
"pg_ls_logdir",
"pg_ls_waldir",
"pg_ls_tmpdir",
"pg_ls_archive_statusdir",
// Sequence / state mutation
"nextval",
"setval",
// Denial of service
"pg_sleep",
"pg_sleep_for",
"pg_sleep_until",
// Session / backend control
"pg_terminate_backend",
"pg_cancel_backend",
"pg_reload_conf",
"pg_rotate_logfile",
"set_config",
// Advisory locks
"pg_advisory_lock",
"pg_advisory_lock_shared",
"pg_try_advisory_lock",
"pg_try_advisory_lock_shared",
"pg_advisory_xact_lock",
"pg_advisory_xact_lock_shared",
"pg_advisory_unlock",
"pg_advisory_unlock_shared",
"pg_advisory_unlock_all",
];
const dangerousFnPattern = new RegExp(
`(?:^|[^\\w$])"?(?:${dangerousFunctions.join("|")})"?\\s*\\(`,
"i",
);
const fnMatch = dangerousFnPattern.exec(noLiterals);
if (fnMatch) {
// Extract the function name from the match for the error message.
const fnNameMatch = fnMatch[0].match(
new RegExp(`(${dangerousFunctions.join("|")})`, "i"),
);
const fnName = fnNameMatch ? fnNameMatch[1].toUpperCase() : "UNKNOWN";
sendJsonError(
res,
`Query rejected: "${fnName}" is a dangerous function that can modify server state. Set readOnly: false to execute this query.`,
);
return;
}

// Reject multi-statement queries (naive: any semicolon not at the very end)
const trimmedForSemicolon = stripped.replace(/;\s*$/, "");
if (trimmedForSemicolon.includes(";")) {
sendJsonError(
res,
"Query rejected: multi-statement queries are not allowed in read-only mode.",
);
const error = validateReadOnlyQuery(sqlText);
if (error) {
sendJsonError(res, error);
return;
}
}
Comment on lines 1238 to 1247

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Security: Reliance on validateReadOnlyQuery for Mutation Protection

In the query handler (lines 1238-1247), the code relies on validateReadOnlyQuery to prevent mutation queries when readOnly is not explicitly set to false. As noted above, this validator is not a full SQL parser and can be bypassed by sophisticated attackers. If the API is exposed to untrusted clients, this could allow unauthorized data modification.

Recommendation:

  • Treat the read-only mode as a best-effort safeguard, not a guarantee. For high-security environments, restrict access to this endpoint or use a more robust query analysis tool.

Expand Down
Loading