diff --git a/cmd/dump/dump_integration_test.go b/cmd/dump/dump_integration_test.go index aca0ac1d..7b2b72f5 100644 --- a/cmd/dump/dump_integration_test.go +++ b/cmd/dump/dump_integration_test.go @@ -61,6 +61,13 @@ func TestDumpCommand_Issue80IndexNameQuote(t *testing.T) { runExactMatchTest(t, "issue_80_index_name_quote") } +func TestDumpCommand_Issue82ViewLogicExpr(t *testing.T) { + if testing.Short() { + t.Skip("Skipping integration test in short mode") + } + runExactMatchTest(t, "issue_82_view_logic_expr") +} + func TestDumpCommand_Issue83ExplicitConstraintName(t *testing.T) { if testing.Short() { t.Skip("Skipping integration test in short mode") diff --git a/ir/formatter.go b/ir/formatter.go index e5322eb9..752c4695 100644 --- a/ir/formatter.go +++ b/ir/formatter.go @@ -242,6 +242,10 @@ func (f *postgreSQLFormatter) formatExpression(expr *pg_query.Node) { f.formatCoalesceExpr(expr.GetCoalesceExpr()) case expr.GetNullTest() != nil: f.formatNullTest(expr.GetNullTest()) + case expr.GetScalarArrayOpExpr() != nil: + f.formatScalarArrayOpExpr(expr.GetScalarArrayOpExpr()) + case expr.GetAArrayExpr() != nil: + f.formatAArrayExpr(expr.GetAArrayExpr()) default: // Fallback to deparse for complex expressions if deparseResult, err := f.deparseNode(expr); err == nil { @@ -297,6 +301,29 @@ func (f *postgreSQLFormatter) formatAConst(constant *pg_query.A_Const) { // formatAExpr formats an A_Expr (binary/unary expressions) func (f *postgreSQLFormatter) formatAExpr(expr *pg_query.A_Expr) { + // Special case: Detect "column = ANY (ARRAY[...])" pattern and convert to "column IN (...)" + // This pattern appears when parsing view definitions from pg_get_viewdef() + if len(expr.Name) == 1 && expr.Rexpr != nil { + if str := expr.Name[0].GetString_(); str != nil && str.Sval == "=" { + if aArrayExpr := expr.Rexpr.GetAArrayExpr(); aArrayExpr != nil { + // Direct array comparison: column = ARRAY[...] + // Convert to IN syntax, stripping unnecessary type casts from constants + f.formatExpressionStripCast(expr.Lexpr) + f.buffer.WriteString(" IN (") + for i, elem := range aArrayExpr.Elements { + if i > 0 { + f.buffer.WriteString(", ") + } + // Strip type casts from constants in IN list + f.formatExpressionStripCast(elem) + } + f.buffer.WriteString(")") + return + } + } + } + + // Default formatting for other A_Expr cases // Format left operand if expr.Lexpr != nil { f.formatExpression(expr.Lexpr) @@ -484,10 +511,10 @@ func (f *postgreSQLFormatter) formatCaseExpr(caseExpr *pg_query.CaseExpr) { } } - // Format ELSE clause + // Format ELSE clause, stripping unnecessary type casts from constants/NULL if caseExpr.Defresult != nil { f.buffer.WriteString(" ELSE ") - f.formatExpression(caseExpr.Defresult) + f.formatExpressionStripCast(caseExpr.Defresult) } f.buffer.WriteString(" END") @@ -568,3 +595,76 @@ func (f *postgreSQLFormatter) formatNullTest(nullTest *pg_query.NullTest) { f.buffer.WriteString(" IS NOT NULL") } } + +// formatExpressionStripCast formats an expression, stripping unnecessary type casts from constants and NULL +func (f *postgreSQLFormatter) formatExpressionStripCast(expr *pg_query.Node) { + // If this is a TypeCast of a constant or NULL, format just the value without the cast + if typeCast := expr.GetTypeCast(); typeCast != nil { + if typeCast.Arg != nil { + if aConst := typeCast.Arg.GetAConst(); aConst != nil { + // This is a typed constant, format just the constant value + f.formatAConst(aConst) + return + } + // For non-constant args, recursively strip casts + f.formatExpressionStripCast(typeCast.Arg) + return + } + } + + // Otherwise, format normally + f.formatExpression(expr) +} + +// formatAArrayExpr formats array expressions (ARRAY[...]) +func (f *postgreSQLFormatter) formatAArrayExpr(arrayExpr *pg_query.A_ArrayExpr) { + f.buffer.WriteString("ARRAY[") + for i, elem := range arrayExpr.Elements { + if i > 0 { + f.buffer.WriteString(", ") + } + f.formatExpression(elem) + } + f.buffer.WriteString("]") +} + +// formatScalarArrayOpExpr formats scalar array operations like "column = ANY (ARRAY[...])" +// and converts them to the simpler "column IN (...)" syntax +func (f *postgreSQLFormatter) formatScalarArrayOpExpr(arrayOp *pg_query.ScalarArrayOpExpr) { + // Check if this is a simple = ANY pattern that can be converted to IN + // UseOr means ANY (disjunction), !UseOr means ALL (conjunction) + isEqualAny := arrayOp.UseOr && len(arrayOp.Args) == 2 + + if isEqualAny { + // Args[0] is the left side (column), Args[1] is the right side (array) + // Format as "column IN (values)" + if len(arrayOp.Args) >= 2 { + // Format left side (the column) + f.formatExpression(arrayOp.Args[0]) + + f.buffer.WriteString(" IN (") + + // Extract values from the array + if arrayExpr := arrayOp.Args[1].GetArrayExpr(); arrayExpr != nil { + // Format array elements as comma-separated list + for i, elem := range arrayExpr.Elements { + if i > 0 { + f.buffer.WriteString(", ") + } + f.formatExpression(elem) + } + } else { + // Fallback: format the right expression as-is + f.formatExpression(arrayOp.Args[1]) + } + + f.buffer.WriteString(")") + return + } + } + + // For other operations (like <> ALL) or malformed expressions, use deparse fallback + if deparseResult, err := f.deparseNode(&pg_query.Node{Node: &pg_query.Node_ScalarArrayOpExpr{ScalarArrayOpExpr: arrayOp}}); err == nil { + f.buffer.WriteString(deparseResult) + } +} diff --git a/ir/normalize.go b/ir/normalize.go index ddebcb7f..0f869b1a 100644 --- a/ir/normalize.go +++ b/ir/normalize.go @@ -226,8 +226,11 @@ func normalizeViewDefinition(definition string) string { normalized = definition } - // Normalize ORDER BY clauses to match pg_get_viewdef format - normalized = normalizeOrderByInView(normalized) + // Apply all AST-based normalizations in one pass to avoid re-parsing + // This includes: + // 1. Converting PostgreSQL's "= ANY (ARRAY[...])" to "IN (...)" + // 2. Normalizing ORDER BY clauses to use aliases + normalized = normalizeViewWithAST(normalized) return normalized } @@ -1058,8 +1061,68 @@ func convertAnyArrayToIn(expr string) string { return fmt.Sprintf("%s IN (%s)", columnName, strings.Join(cleanValues, ", ")) } +// normalizeViewWithAST applies all AST-based normalizations in a single pass +// This includes converting "= ANY (ARRAY[...])" to "IN (...)" and normalizing ORDER BY +func normalizeViewWithAST(definition string) string { + if definition == "" { + return definition + } + + // Parse the view definition + parseResult, err := pg_query.Parse(definition) + if err != nil { + return definition + } + + if len(parseResult.Stmts) == 0 { + return definition + } + + stmt := parseResult.Stmts[0] + selectStmt := stmt.Stmt.GetSelectStmt() + if selectStmt == nil { + return definition + } + + // Step 1: Normalize ORDER BY clauses (modify AST if needed) + orderByModified := false + if len(selectStmt.SortClause) > 0 { + // Build reverse alias map (expression -> alias) from target list + exprToAliasMap := buildExpressionToAliasMap(selectStmt.TargetList) + + // Transform ORDER BY clauses: replace complex expressions with aliases when possible + for _, sortItem := range selectStmt.SortClause { + if sortBy := sortItem.GetSortBy(); sortBy != nil { + if wasModified := normalizeOrderByExpressionToAlias(sortBy, exprToAliasMap); wasModified { + orderByModified = true + } + } + } + } + + // Step 2: Check if we need to use custom formatter + // Use custom formatter if: + // a) The view definition contains "= ANY" (needs conversion to IN) + // b) ORDER BY was modified + needsCustomFormatter := strings.Contains(definition, "= ANY") || orderByModified + + if needsCustomFormatter { + // Use custom formatter to format the entire query + // The formatter will handle: + // - Converting "= ANY (ARRAY[...])" to "IN (...)" + // - Proper formatting of all expressions + formatter := newPostgreSQLFormatter() + formatted := formatter.formatQueryNode(stmt.Stmt) + if formatted != "" { + return formatted + } + } + + return definition +} + // normalizeOrderByInView normalizes ORDER BY clauses in view definitions -// This converts PostgreSQL's pg_get_viewdef format (with parentheses and expressions) +// This converts PostgreSQL's pg_get_viewdef format (with parentheses and expressions) // back to parser format (using column aliases) for consistent comparison // Uses AST manipulation for robustness func normalizeOrderByInView(definition string) string { @@ -1098,6 +1161,7 @@ func normalizeOrderByInView(definition string) string { } // If we made modifications, use PostgreSQL formatter to maintain formatting + // IMPORTANT: Use the custom formatter to preserve ANY->IN conversions done earlier if modified { formatter := newPostgreSQLFormatter() formatted := formatter.formatQueryNode(stmt.Stmt) diff --git a/testdata/dump/issue_82_view_logic_expr/manifest.json b/testdata/dump/issue_82_view_logic_expr/manifest.json new file mode 100644 index 00000000..2c9e9c20 --- /dev/null +++ b/testdata/dump/issue_82_view_logic_expr/manifest.json @@ -0,0 +1,9 @@ +{ + "name": "issue_82_view_logic_expr", + "description": "Test case for view logical expressions dumping (GitHub issue #82)", + "source": "https://github.com/pgschema/pgschema/issues/82", + "notes": [ + "Tests that views with CASE statements and IN clauses are correctly preserved when dumping", + "Verifies ORDER BY with aliased columns doesn't corrupt the CASE expression" + ] +} diff --git a/testdata/dump/issue_82_view_logic_expr/pgdump.sql b/testdata/dump/issue_82_view_logic_expr/pgdump.sql new file mode 100644 index 00000000..94df1770 --- /dev/null +++ b/testdata/dump/issue_82_view_logic_expr/pgdump.sql @@ -0,0 +1,61 @@ +-- +-- PostgreSQL database dump +-- + +-- Dumped from database version 17.5 (Debian 17.5-1.pgdg120+1) +-- Dumped by pg_dump version 17.5 (Homebrew) + +SET statement_timeout = 0; +SET lock_timeout = 0; +SET idle_in_transaction_session_timeout = 0; +SET transaction_timeout = 0; +SET client_encoding = 'UTF8'; +SET standard_conforming_strings = on; +SELECT pg_catalog.set_config('search_path', '', false); +SET check_function_bodies = false; +SET xmloption = content; +SET client_min_messages = warning; +SET row_security = off; + +SET default_tablespace = ''; + +SET default_table_access_method = heap; + +-- +-- Name: orders; Type: TABLE; Schema: public; Owner: - +-- + +CREATE TABLE public.orders ( + id integer NOT NULL, + status character varying(50) NOT NULL, + amount numeric(10,2) +); + + +-- +-- Name: paid_orders; Type: VIEW; Schema: public; Owner: - +-- + +CREATE VIEW public.paid_orders AS + SELECT id AS order_id, + status, + CASE + WHEN ((status)::text = ANY ((ARRAY['paid'::character varying, 'completed'::character varying])::text[])) THEN amount + ELSE NULL::numeric + END AS paid_amount + FROM public.orders + ORDER BY id, status; + + +-- +-- Name: orders orders_pkey; Type: CONSTRAINT; Schema: public; Owner: - +-- + +ALTER TABLE ONLY public.orders + ADD CONSTRAINT orders_pkey PRIMARY KEY (id); + + +-- +-- PostgreSQL database dump complete +-- + diff --git a/testdata/dump/issue_82_view_logic_expr/pgschema.sql b/testdata/dump/issue_82_view_logic_expr/pgschema.sql new file mode 100644 index 00000000..58588181 --- /dev/null +++ b/testdata/dump/issue_82_view_logic_expr/pgschema.sql @@ -0,0 +1,31 @@ +-- +-- pgschema database dump +-- + +-- Dumped from database version PostgreSQL 17.5 +-- Dumped by pgschema version 1.4.0 + + +-- +-- Name: orders; Type: TABLE; Schema: -; Owner: - +-- + +CREATE TABLE IF NOT EXISTS orders ( + id integer, + status varchar(50) NOT NULL, + amount numeric(10,2), + CONSTRAINT orders_pkey PRIMARY KEY (id) +); + +-- +-- Name: paid_orders; Type: VIEW; Schema: -; Owner: - +-- + +CREATE OR REPLACE VIEW paid_orders AS + SELECT + id AS order_id, + status, + CASE WHEN status IN ('paid', 'completed') THEN amount ELSE NULL END AS paid_amount + FROM orders + ORDER BY order_id, status; + diff --git a/testdata/dump/issue_82_view_logic_expr/raw.sql b/testdata/dump/issue_82_view_logic_expr/raw.sql new file mode 100644 index 00000000..4a5e2c0d --- /dev/null +++ b/testdata/dump/issue_82_view_logic_expr/raw.sql @@ -0,0 +1,36 @@ +-- +-- Test case for GitHub issue #82: View logical expressions dumping +-- +-- This test case reproduces a bug where view definitions with CASE statements +-- containing IN clauses become syntactically incorrect when dumped. +-- +-- The issue occurs when: +-- 1. A view has a CASE statement with an IN clause +-- 2. The view uses column aliases +-- 3. The view has an ORDER BY clause referencing the aliased columns +-- +-- Original bug: CASE WHEN status IN ('paid', 'completed') THEN amount ELSE NULL END +-- Gets corrupted to: CASE WHEN status::text = ::text THEN amount ELSE NULL END +-- (The IN clause values disappear!) +-- + +-- +-- Base table: orders with status tracking +-- +CREATE TABLE orders ( + id integer PRIMARY KEY, + status varchar(50) NOT NULL, + amount numeric(10,2) +); + +-- +-- Problematic view: Uses CASE with IN clause + column alias in ORDER BY +-- This specific combination triggers the bug +-- +CREATE VIEW paid_orders AS + SELECT + id AS order_id, + status, + CASE WHEN status IN ('paid', 'completed') THEN amount ELSE NULL END AS paid_amount + FROM orders + ORDER BY order_id, status;