From 5f2ccad53fead0330a28b7df55973e59c3bc5b6f Mon Sep 17 00:00:00 2001 From: Claude Date: Sun, 14 Dec 2025 21:14:40 +0000 Subject: [PATCH] Fix multiple GitHub issues MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit addresses the following issues: - #118: Add percentile_cont, percentile_disc, mode to ALLOWED_FUNCTIONS - #111: Fix PostgreSQL ≤12 compatibility for stddev_exec_time column - #110: Fix Docker entrypoint (add exec) and create app user in Dockerfile - #109 & #94: Fix double quote parsing and sequence name escaping - #99: Add configurable query timeout via QUERY_TIMEOUT env var or --query-timeout - #95: Redirect logging to stderr to avoid interfering with stdio MCP transport - #93: Update broken README links to archived MCP servers repo - #100 & #74: Add uvx and WSL instructions to README - #71: Add table/column comments to metadata tools (list_objects, get_object_details) Changes: - safe_sql.py: Add ordered-set aggregate functions - top_queries_calc.py: Use version-appropriate column name for stddev - docker-entrypoint.sh: Use exec for proper signal handling - Dockerfile: Create app user before COPY with --chown - sequence_health_calc.py: Fix parsing of quoted identifiers - server.py: Add configurable timeout, table/column comments in metadata - __init__.py: Configure logging to stderr - README.md: Add uvx/WSL instructions, fix broken links --- Dockerfile | 4 + README.md | 57 +++++++++++-- docker-entrypoint.sh | 17 +--- src/postgres_mcp/__init__.py | 9 +++ .../database_health/sequence_health_calc.py | 61 +++++++++++--- src/postgres_mcp/server.py | 81 ++++++++++++++++--- src/postgres_mcp/sql/safe_sql.py | 4 + .../top_queries/top_queries_calc.py | 4 +- 8 files changed, 195 insertions(+), 42 deletions(-) diff --git a/Dockerfile b/Dockerfile index 62b0f64d..814b4962 100644 --- a/Dockerfile +++ b/Dockerfile @@ -27,6 +27,10 @@ FROM python:3.12-slim-bookworm # Python executable must be the same, e.g., using `python:3.11-slim-bookworm` # will fail. +# Create a non-root user for security +RUN groupadd --gid 1000 app && \ + useradd --uid 1000 --gid app --shell /bin/bash --create-home app + COPY --from=builder --chown=app:app /app /app ENV PATH="/app/.venv/bin:$PATH" diff --git a/README.md b/README.md index 82236d33..ee455360 100644 --- a/README.md +++ b/README.md @@ -194,6 +194,53 @@ The Postgres MCP Pro Docker image will automatically remap the hostname `localho ``` +##### If you are using `uvx` + +`uvx` is a convenient way to run Python packages directly without explicit installation: + +```json +{ + "mcpServers": { + "postgres": { + "command": "uvx", + "args": [ + "postgres-mcp", + "--access-mode=unrestricted" + ], + "env": { + "DATABASE_URI": "postgresql://username:password@localhost:5432/dbname" + } + } + } +} +``` + + +##### If you are using WSL (Windows Subsystem for Linux) + +When running Claude Desktop on Windows with the MCP server in WSL, use `wsl` as the command: + +```json +{ + "mcpServers": { + "postgres": { + "command": "wsl", + "args": [ + "uvx", + "postgres-mcp", + "--access-mode=unrestricted" + ], + "env": { + "DATABASE_URI": "postgresql://username:password@localhost:5432/dbname" + } + } + } +} +``` + +Note: Ensure `uvx` is installed and available in your WSL environment's PATH. + + ##### Connection URI Replace `postgresql://...` with your [Postgres database connection URI](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-CONNSTRING-URIS). @@ -313,7 +360,7 @@ The [MCP standard](https://modelcontextprotocol.io/) defines various types of en Postgres MCP Pro provides functionality via [MCP tools](https://modelcontextprotocol.io/docs/concepts/tools) alone. We chose this approach because the [MCP client ecosystem](https://modelcontextprotocol.io/clients) has widespread support for MCP tools. -This contrasts with the approach of other Postgres MCP servers, including the [Reference Postgres MCP Server](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres), which use [MCP resources](https://modelcontextprotocol.io/docs/concepts/resources) to expose schema information. +This contrasts with the approach of other Postgres MCP servers, including the [Reference Postgres MCP Server](https://github.com/modelcontextprotocol/servers-archived/tree/main/src/postgres), which use [MCP resources](https://modelcontextprotocol.io/docs/concepts/resources) to expose schema information. Postgres MCP Pro Tools: @@ -336,7 +383,7 @@ Postgres MCP Pro Tools: **Postgres MCP Servers** - [Query MCP](https://github.com/alexander-zuev/supabase-mcp-server). An MCP server for Supabase Postgres with a three-tier safety architecture and Supabase management API support. - [PG-MCP](https://github.com/stuzero/pg-mcp-server). An MCP server for PostgreSQL with flexible connection options, explain plans, extension context, and more. -- [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres). A simple MCP Server implementation exposing schema information as MCP resources and executing read-only queries. +- [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers-archived/tree/main/src/postgres). A simple MCP Server implementation exposing schema information as MCP resources and executing read-only queries. - [Supabase Postgres MCP Server](https://github.com/supabase-community/supabase-mcp). This MCP Server provides Supabase management features and is actively maintained by the Supabase community. - [Nile MCP Server](https://github.com/niledatabase/nile-mcp-server). An MCP server providing access to the management API for the Nile's multi-tenant Postgres service. - [Neon MCP Server](https://github.com/neondatabase-labs/mcp-server-neon). An MCP server providing access to the management API for Neon's serverless Postgres service. @@ -524,7 +571,7 @@ We remain open to revising this decision in the future. ### Connection Configuration -Like the [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres), Postgres MCP Pro takes Postgres connection information at startup. +Like the [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers-archived/tree/main/src/postgres), Postgres MCP Pro takes Postgres connection information at startup. This is convenient for users who always connect to the same database but can be cumbersome when users switch databases. An alternative approach, taken by [PG-MCP](https://github.com/stuzero/pg-mcp-server), is provide connection details via MCP tool calls at the time of use. @@ -549,7 +596,7 @@ However, we do not know whether other LLMs do so as reliably and capably. *Would it be better to provide schema information using [MCP resources](https://modelcontextprotocol.io/docs/concepts/resources) rather than [MCP tools](https://modelcontextprotocol.io/docs/concepts/tools)?* -The [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres) uses resources to expose schema information rather than tools. +The [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers-archived/tree/main/src/postgres) uses resources to expose schema information rather than tools. Navigating resources is similar to navigating a file system, so this approach is natural in many ways. However, resource support is less widespread than tool support in the MCP client ecosystem (see [example clients](https://modelcontextprotocol.io/clients)). In addition, while the MCP standard says that resources can be accessed by either AI agents or end-user humans, some clients only support human navigation of the resource tree. @@ -570,7 +617,7 @@ While this is a good approach, many find this cumbersome in practice. Postgres does not provide a way to place a connection or session into read-only mode, so Postgres MCP Pro uses a more complex approach to ensure read-only SQL execution on top of a read-write connection. Postgres MCP Provides a read-only transaction mode that prevents data and schema modifications. -Like the [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers/tree/main/src/postgres), we use read-only transactions to provide protected SQL execution. +Like the [Reference PostgreSQL MCP Server](https://github.com/modelcontextprotocol/servers-archived/tree/main/src/postgres), we use read-only transactions to provide protected SQL execution. To make this mechanism robust, we need to ensure that the SQL does not somehow circumvent the read-only transaction mode, say by issuing a `COMMIT` or `ROLLBACK` statement and then beginning a new transaction. diff --git a/docker-entrypoint.sh b/docker-entrypoint.sh index 434522c8..52a77692 100644 --- a/docker-entrypoint.sh +++ b/docker-entrypoint.sh @@ -93,17 +93,6 @@ echo "Executing command:" >&2 echo "${processed_args[@]}" >&2 echo "----------------" >&2 -# Execute the command with the processed arguments -"${processed_args[@]}" - -# Capture exit code from the Python process -exit_code=$? - -# If the Python process failed, print additional debug info -if [ $exit_code -ne 0 ]; then - echo "ERROR: Command failed with exit code $exit_code" >&2 - echo "Command was: ${processed_args[@]}" >&2 -fi - -# Return the exit code from the Python process -exit $exit_code +# Execute the command with the processed arguments using exec to replace the shell +# This ensures proper signal handling (SIGTERM, SIGINT) and makes Python PID 1 +exec "${processed_args[@]}" diff --git a/src/postgres_mcp/__init__.py b/src/postgres_mcp/__init__.py index a00e3497..54c784d0 100644 --- a/src/postgres_mcp/__init__.py +++ b/src/postgres_mcp/__init__.py @@ -1,4 +1,5 @@ import asyncio +import logging import sys from . import server @@ -7,6 +8,14 @@ def main(): """Main entry point for the package.""" + # Configure logging to use stderr to avoid interfering with stdio MCP transport + # The MCP protocol uses stdout for communication, so all logs must go to stderr + logging.basicConfig( + level=logging.INFO, + format="%(asctime)s - %(name)s - %(levelname)s - %(message)s", + stream=sys.stderr, + ) + # As of version 3.3.0 Psycopg on Windows is not compatible with the default # ProactorEventLoop. # See: https://www.psycopg.org/psycopg3/docs/advanced/async.html#async diff --git a/src/postgres_mcp/database_health/sequence_health_calc.py b/src/postgres_mcp/database_health/sequence_health_calc.py index 81582fa6..aac97cc3 100644 --- a/src/postgres_mcp/database_health/sequence_health_calc.py +++ b/src/postgres_mcp/database_health/sequence_health_calc.py @@ -1,6 +1,7 @@ from dataclasses import dataclass from psycopg.sql import Identifier +from psycopg.sql import Literal from ..sql import SafeSqlDriver from ..sql import SqlDriver @@ -101,15 +102,23 @@ async def _get_sequence_metrics(self) -> list[SequenceMetrics]: max_value = 2147483647 if seq["column_type"] == "integer" else 9223372036854775807 # Get sequence attributes + # Note: has_sequence_privilege expects a text argument (sequence name as string) + # while FROM clause needs a properly quoted identifier + # Build the fully qualified sequence name for has_sequence_privilege + if schema: + seq_name_for_privilege = f'"{schema}"."{sequence}"' + else: + seq_name_for_privilege = f'"{sequence}"' + attrs = await SafeSqlDriver.execute_param_query( self.sql_driver, """ SELECT - has_sequence_privilege('{}', 'SELECT') AS readable, + has_sequence_privilege({}, 'SELECT') AS readable, last_value FROM {} """, - [Identifier(schema, sequence), Identifier(schema, sequence)], + [Literal(seq_name_for_privilege), Identifier(schema, sequence)], ) if not attrs: @@ -135,17 +144,51 @@ async def _get_sequence_metrics(self) -> list[SequenceMetrics]: return sequence_metrics def _parse_sequence_name(self, default_value: str) -> tuple[str, str]: - """Parse schema and sequence name from default value expression.""" - # Handle both formats: - # nextval('id_seq'::regclass) - # nextval(('id_seq'::text)::regclass) + """Parse schema and sequence name from default value expression. + Handles various formats including: + - nextval('id_seq'::regclass) + - nextval('public.id_seq'::regclass) + - nextval('"Schema"."Sequence_Name"'::regclass) + - nextval(('"Schema"."Sequence_Name"'::text)::regclass) + """ # Remove nextval and cast parts clean_value = default_value.replace("nextval('", "").replace("'::regclass)", "") clean_value = clean_value.replace("('", "").replace("'::text)", "") - # Split into schema and sequence - parts = clean_value.split(".") + # Handle quoted identifiers (e.g., "Schema"."Table") + # Split on '.' but respect quoted identifiers + parts = [] + current_part = "" + in_quotes = False + + for char in clean_value: + if char == '"': + in_quotes = not in_quotes + # Keep the quotes for now, we'll strip them later + current_part += char + elif char == '.' and not in_quotes: + if current_part: + parts.append(current_part) + current_part = "" + else: + current_part += char + + if current_part: + parts.append(current_part) + + # Strip double quotes from parts and handle empty parts + def strip_quotes(s: str) -> str: + s = s.strip() + if s.startswith('"') and s.endswith('"'): + return s[1:-1] + return s + + parts = [strip_quotes(p) for p in parts if p.strip()] + if len(parts) == 1: return "public", parts[0] # Default to public schema - return parts[0], parts[1] + elif len(parts) >= 2: + return parts[0], parts[1] + else: + return "public", "" diff --git a/src/postgres_mcp/server.py b/src/postgres_mcp/server.py index af5669a1..520e9041 100644 --- a/src/postgres_mcp/server.py +++ b/src/postgres_mcp/server.py @@ -39,6 +39,7 @@ # Constants PG_STAT_STATEMENTS = "pg_stat_statements" HYPOPG_EXTENSION = "hypopg" +DEFAULT_QUERY_TIMEOUT = 30 # Default timeout in seconds for restricted mode ResponseType = List[types.TextContent | types.ImageContent | types.EmbeddedResource] @@ -55,6 +56,7 @@ class AccessMode(str, Enum): # Global variables db_connection = DbConnPool() current_access_mode = AccessMode.UNRESTRICTED +current_query_timeout = DEFAULT_QUERY_TIMEOUT shutdown_in_progress = False @@ -63,8 +65,8 @@ async def get_sql_driver() -> Union[SqlDriver, SafeSqlDriver]: base_driver = SqlDriver(conn=db_connection) if current_access_mode == AccessMode.RESTRICTED: - logger.debug("Using SafeSqlDriver with restrictions (RESTRICTED mode)") - return SafeSqlDriver(sql_driver=base_driver, timeout=30) # 30 second timeout + logger.debug(f"Using SafeSqlDriver with restrictions (RESTRICTED mode, timeout={current_query_timeout}s)") + return SafeSqlDriver(sql_driver=base_driver, timeout=current_query_timeout) else: logger.debug("Using unrestricted SqlDriver (UNRESTRICTED mode)") return base_driver @@ -120,15 +122,27 @@ async def list_objects( rows = await SafeSqlDriver.execute_param_query( sql_driver, """ - SELECT table_schema, table_name, table_type - FROM information_schema.tables - WHERE table_schema = {} AND table_type = {} - ORDER BY table_name + SELECT + t.table_schema, + t.table_name, + t.table_type, + obj_description((quote_ident(t.table_schema) || '.' || quote_ident(t.table_name))::regclass, 'pg_class') AS comment + FROM information_schema.tables t + WHERE t.table_schema = {} AND t.table_type = {} + ORDER BY t.table_name """, [schema_name, table_type], ) objects = ( - [{"schema": row.cells["table_schema"], "name": row.cells["table_name"], "type": row.cells["table_type"]} for row in rows] + [ + { + "schema": row.cells["table_schema"], + "name": row.cells["table_name"], + "type": row.cells["table_type"], + "comment": row.cells["comment"], + } + for row in rows + ] if rows else [] ) @@ -185,14 +199,22 @@ async def get_object_details( sql_driver = await get_sql_driver() if object_type in ("table", "view"): - # Get columns + # Get columns with comments col_rows = await SafeSqlDriver.execute_param_query( sql_driver, """ - SELECT column_name, data_type, is_nullable, column_default - FROM information_schema.columns - WHERE table_schema = {} AND table_name = {} - ORDER BY ordinal_position + SELECT + c.column_name, + c.data_type, + c.is_nullable, + c.column_default, + col_description( + (quote_ident(c.table_schema) || '.' || quote_ident(c.table_name))::regclass, + c.ordinal_position + ) AS comment + FROM information_schema.columns c + WHERE c.table_schema = {} AND c.table_name = {} + ORDER BY c.ordinal_position """, [schema_name, object_name], ) @@ -203,6 +225,7 @@ async def get_object_details( "data_type": r.cells["data_type"], "is_nullable": r.cells["is_nullable"], "default": r.cells["column_default"], + "comment": r.cells["comment"], } for r in col_rows ] @@ -251,8 +274,21 @@ async def get_object_details( indexes = [{"name": r.cells["indexname"], "definition": r.cells["indexdef"]} for r in idx_rows] if idx_rows else [] + # Get table/view comment + comment_rows = await SafeSqlDriver.execute_param_query( + sql_driver, + """ + SELECT obj_description( + (quote_ident({}) || '.' || quote_ident({}))::regclass, + 'pg_class' + ) AS comment + """, + [schema_name, object_name], + ) + table_comment = comment_rows[0].cells["comment"] if comment_rows else None + result = { - "basic": {"schema": schema_name, "name": object_name, "type": object_type}, + "basic": {"schema": schema_name, "name": object_name, "type": object_type, "comment": table_comment}, "columns": columns, "constraints": constraints_list, "indexes": indexes, @@ -539,6 +575,12 @@ async def main(): default=8000, help="Port for SSE server (default: 8000)", ) + parser.add_argument( + "--query-timeout", + type=int, + default=None, + help=f"Query timeout in seconds for restricted mode (default: {DEFAULT_QUERY_TIMEOUT}). Can also be set via QUERY_TIMEOUT env var.", + ) args = parser.parse_args() @@ -546,6 +588,19 @@ async def main(): global current_access_mode current_access_mode = AccessMode(args.access_mode) + # Set query timeout from CLI argument or environment variable + global current_query_timeout + if args.query_timeout is not None: + current_query_timeout = args.query_timeout + else: + env_timeout = os.environ.get("QUERY_TIMEOUT") + if env_timeout is not None: + try: + current_query_timeout = int(env_timeout) + except ValueError: + logger.warning(f"Invalid QUERY_TIMEOUT value '{env_timeout}', using default {DEFAULT_QUERY_TIMEOUT}") + current_query_timeout = DEFAULT_QUERY_TIMEOUT + # Add the query tool with a description appropriate to the access mode if current_access_mode == AccessMode.UNRESTRICTED: mcp.add_tool(execute_sql, description="Execute any SQL query") diff --git a/src/postgres_mcp/sql/safe_sql.py b/src/postgres_mcp/sql/safe_sql.py index 2aa07ee4..c8a45ac3 100644 --- a/src/postgres_mcp/sql/safe_sql.py +++ b/src/postgres_mcp/sql/safe_sql.py @@ -657,6 +657,10 @@ class SafeSqlDriver(SqlDriver): "first_value", "last_value", "nth_value", + # Ordered-set aggregate functions (PostgreSQL 9.4+) + "percentile_cont", + "percentile_disc", + "mode", } ALLOWED_NODE_TYPES: ClassVar[set[type]] = ALLOWED_STMT_TYPES | { diff --git a/src/postgres_mcp/top_queries/top_queries_calc.py b/src/postgres_mcp/top_queries/top_queries_calc.py index 87191073..4051bcf1 100644 --- a/src/postgres_mcp/top_queries/top_queries_calc.py +++ b/src/postgres_mcp/top_queries/top_queries_calc.py @@ -143,10 +143,12 @@ async def get_top_resource_queries(self, frac_threshold: float = 0.05) -> str: # PostgreSQL 13 and newer total_time_col = "total_exec_time" mean_time_col = "mean_exec_time" + stddev_time_col = "stddev_exec_time" else: # PostgreSQL 12 and older total_time_col = "total_time" mean_time_col = "mean_time" + stddev_time_col = "stddev_time" query = cast( LiteralString, @@ -158,7 +160,7 @@ async def get_top_resource_queries(self, frac_threshold: float = 0.05) -> str: rows, {total_time_col} total_exec_time, {mean_time_col} mean_exec_time, - stddev_exec_time, + {stddev_time_col} stddev_exec_time, shared_blks_hit, shared_blks_read, shared_blks_dirtied,