From d1427f5613ba7c4d5b4ce81efed0a7fb9e79bf84 Mon Sep 17 00:00:00 2001 From: tianzhou Date: Wed, 17 Dec 2025 22:20:27 -0800 Subject: [PATCH 1/3] fix: type topological sort --- internal/diff/topological.go | 137 ++++++++++++++++++ internal/diff/type.go | 20 +-- .../diff/dependency/type_to_type/diff.sql | 6 + testdata/diff/dependency/type_to_type/new.sql | 11 ++ testdata/diff/dependency/type_to_type/old.sql | 1 + .../diff/dependency/type_to_type/plan.json | 26 ++++ .../diff/dependency/type_to_type/plan.sql | 6 + .../diff/dependency/type_to_type/plan.txt | 18 +++ testdata/dump/sakila/pgschema.sql | 12 +- 9 files changed, 213 insertions(+), 24 deletions(-) create mode 100644 testdata/diff/dependency/type_to_type/diff.sql create mode 100644 testdata/diff/dependency/type_to_type/new.sql create mode 100644 testdata/diff/dependency/type_to_type/old.sql create mode 100644 testdata/diff/dependency/type_to_type/plan.json create mode 100644 testdata/diff/dependency/type_to_type/plan.sql create mode 100644 testdata/diff/dependency/type_to_type/plan.txt diff --git a/internal/diff/topological.go b/internal/diff/topological.go index 34adadd0..9d567743 100644 --- a/internal/diff/topological.go +++ b/internal/diff/topological.go @@ -238,3 +238,140 @@ func nextInOrder(order []string, processed map[string]bool) string { } return "" } + +// topologicallySortTypes sorts types across all schemas in dependency order +// Types that are referenced by composite types will come before the types that reference them +func topologicallySortTypes(types []*ir.Type) []*ir.Type { + if len(types) <= 1 { + return types + } + + // Build maps for efficient lookup + typeMap := make(map[string]*ir.Type) + var insertionOrder []string + for _, t := range types { + key := t.Schema + "." + t.Name + typeMap[key] = t + insertionOrder = append(insertionOrder, key) + } + + // Build dependency graph + inDegree := make(map[string]int) + adjList := make(map[string][]string) + + // Initialize + for key := range typeMap { + inDegree[key] = 0 + adjList[key] = []string{} + } + + // Build edges: if typeA references typeB (composite type column uses typeB), add edge typeB -> typeA + for keyA, typeA := range typeMap { + if typeA.Kind == ir.TypeKindComposite { + for _, col := range typeA.Columns { + // Extract type name from DataType (may include schema prefix or array notation) + referencedType := extractTypeName(col.DataType, typeA.Schema) + if referencedType != "" { + // Check if the referenced type exists in our set + if _, exists := typeMap[referencedType]; exists && keyA != referencedType { + adjList[referencedType] = append(adjList[referencedType], keyA) + inDegree[keyA]++ + } + } + } + } else if typeA.Kind == ir.TypeKindDomain { + // Domain types may reference other types as their base type + referencedType := extractTypeName(typeA.BaseType, typeA.Schema) + if referencedType != "" { + if _, exists := typeMap[referencedType]; exists && keyA != referencedType { + adjList[referencedType] = append(adjList[referencedType], keyA) + inDegree[keyA]++ + } + } + } + } + + // Kahn's algorithm with deterministic cycle breaking + var queue []string + var result []string + processed := make(map[string]bool, len(typeMap)) + + // Seed queue with nodes that have no incoming edges + for key, degree := range inDegree { + if degree == 0 { + queue = append(queue, key) + } + } + sort.Strings(queue) + + for len(result) < len(typeMap) { + if len(queue) == 0 { + // Cycle detected: pick the next unprocessed type using original insertion order + next := nextInOrder(insertionOrder, processed) + if next == "" { + break + } + queue = append(queue, next) + inDegree[next] = 0 + } + + current := queue[0] + queue = queue[1:] + if processed[current] { + continue + } + processed[current] = true + result = append(result, current) + + neighbors := append([]string(nil), adjList[current]...) + sort.Strings(neighbors) + + for _, neighbor := range neighbors { + inDegree[neighbor]-- + if inDegree[neighbor] <= 0 && !processed[neighbor] { + queue = append(queue, neighbor) + sort.Strings(queue) + } + } + } + + // Convert result back to type slice + sortedTypes := make([]*ir.Type, 0, len(result)) + for _, key := range result { + sortedTypes = append(sortedTypes, typeMap[key]) + } + + return sortedTypes +} + +// extractTypeName extracts a fully qualified type name from a data type string +// It handles array notation (e.g., "status_type[]") and schema prefixes +func extractTypeName(dataType, defaultSchema string) string { + if dataType == "" { + return "" + } + + // Remove array notation + typeName := dataType + for len(typeName) > 2 && typeName[len(typeName)-2:] == "[]" { + typeName = typeName[:len(typeName)-2] + } + + // Check if it's a schema-qualified name + if idx := findLastDot(typeName); idx != -1 { + return typeName // Already fully qualified + } + + // Not qualified - use default schema + return defaultSchema + "." + typeName +} + +// findLastDot finds the last dot in a string, returning -1 if not found +func findLastDot(s string) int { + for i := len(s) - 1; i >= 0; i-- { + if s[i] == '.' { + return i + } + } + return -1 +} diff --git a/internal/diff/type.go b/internal/diff/type.go index a6fce58c..554cf3e2 100644 --- a/internal/diff/type.go +++ b/internal/diff/type.go @@ -10,24 +10,8 @@ import ( // generateCreateTypesSQL generates CREATE TYPE statements func generateCreateTypesSQL(types []*ir.Type, targetSchema string, collector *diffCollector) { - // Sort types: CREATE TYPE statements first, then CREATE DOMAIN statements - sortedTypes := make([]*ir.Type, len(types)) - copy(sortedTypes, types) - sort.Slice(sortedTypes, func(i, j int) bool { - typeI := sortedTypes[i] - typeJ := sortedTypes[j] - - // Domain types should come after non-domain types - if typeI.Kind == ir.TypeKindDomain && typeJ.Kind != ir.TypeKindDomain { - return false - } - if typeI.Kind != ir.TypeKindDomain && typeJ.Kind == ir.TypeKindDomain { - return true - } - - // Within the same category, sort alphabetically by name - return typeI.Name < typeJ.Name - }) + // Sort types topologically to handle dependencies (e.g., composite types referencing enum types) + sortedTypes := topologicallySortTypes(types) for _, typeObj := range sortedTypes { sql := generateTypeSQL(typeObj, targetSchema) diff --git a/testdata/diff/dependency/type_to_type/diff.sql b/testdata/diff/dependency/type_to_type/diff.sql new file mode 100644 index 00000000..40f4cd7f --- /dev/null +++ b/testdata/diff/dependency/type_to_type/diff.sql @@ -0,0 +1,6 @@ +CREATE TYPE status_type AS ENUM ( + 'active', + 'inactive' +); + +CREATE TYPE record_type AS (id integer, status status_type); diff --git a/testdata/diff/dependency/type_to_type/new.sql b/testdata/diff/dependency/type_to_type/new.sql new file mode 100644 index 00000000..b40cfb39 --- /dev/null +++ b/testdata/diff/dependency/type_to_type/new.sql @@ -0,0 +1,11 @@ +-- Enum type (dependency) +CREATE TYPE public.status_type AS ENUM ( + 'active', + 'inactive' +); + +-- Composite type that references the enum type +CREATE TYPE public.record_type AS ( + id integer, + status status_type +); diff --git a/testdata/diff/dependency/type_to_type/old.sql b/testdata/diff/dependency/type_to_type/old.sql new file mode 100644 index 00000000..52b92097 --- /dev/null +++ b/testdata/diff/dependency/type_to_type/old.sql @@ -0,0 +1 @@ +-- Empty schema (no types) diff --git a/testdata/diff/dependency/type_to_type/plan.json b/testdata/diff/dependency/type_to_type/plan.json new file mode 100644 index 00000000..fb8c3923 --- /dev/null +++ b/testdata/diff/dependency/type_to_type/plan.json @@ -0,0 +1,26 @@ +{ + "version": "1.0.0", + "pgschema_version": "1.5.0", + "created_at": "1970-01-01T00:00:00Z", + "source_fingerprint": { + "hash": "965b1131737c955e24c7f827c55bd78e4cb49a75adfd04229e0ba297376f5085" + }, + "groups": [ + { + "steps": [ + { + "sql": "CREATE TYPE status_type AS ENUM (\n 'active',\n 'inactive'\n);", + "type": "type", + "operation": "create", + "path": "public.status_type" + }, + { + "sql": "CREATE TYPE record_type AS (id integer, status status_type);", + "type": "type", + "operation": "create", + "path": "public.record_type" + } + ] + } + ] +} diff --git a/testdata/diff/dependency/type_to_type/plan.sql b/testdata/diff/dependency/type_to_type/plan.sql new file mode 100644 index 00000000..40f4cd7f --- /dev/null +++ b/testdata/diff/dependency/type_to_type/plan.sql @@ -0,0 +1,6 @@ +CREATE TYPE status_type AS ENUM ( + 'active', + 'inactive' +); + +CREATE TYPE record_type AS (id integer, status status_type); diff --git a/testdata/diff/dependency/type_to_type/plan.txt b/testdata/diff/dependency/type_to_type/plan.txt new file mode 100644 index 00000000..13c0b69a --- /dev/null +++ b/testdata/diff/dependency/type_to_type/plan.txt @@ -0,0 +1,18 @@ +Plan: 2 to add. + +Summary by type: + types: 2 to add + +Types: + + record_type + + status_type + +DDL to be executed: +-------------------------------------------------- + +CREATE TYPE status_type AS ENUM ( + 'active', + 'inactive' +); + +CREATE TYPE record_type AS (id integer, status status_type); diff --git a/testdata/dump/sakila/pgschema.sql b/testdata/dump/sakila/pgschema.sql index a0fd44da..c36912d1 100644 --- a/testdata/dump/sakila/pgschema.sql +++ b/testdata/dump/sakila/pgschema.sql @@ -6,6 +6,12 @@ -- Dumped by pgschema version 1.5.0 +-- +-- Name: bıgınt; Type: DOMAIN; Schema: -; Owner: - +-- + +CREATE DOMAIN bıgınt AS bigint; + -- -- Name: mpaa_rating; Type: TYPE; Schema: -; Owner: - -- @@ -18,12 +24,6 @@ CREATE TYPE mpaa_rating AS ENUM ( 'NC-17' ); --- --- Name: bıgınt; Type: DOMAIN; Schema: -; Owner: - --- - -CREATE DOMAIN bıgınt AS bigint; - -- -- Name: year; Type: DOMAIN; Schema: -; Owner: - -- From 8f8ba97e05c9590457b6b9786ea708dd505d069d Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 22:54:46 -0800 Subject: [PATCH 2/3] Add detailed cycle-breaking comments to topologicallySortTypes (#206) * Initial plan * Add detailed cycle-breaking comments to topologicallySortTypes Co-authored-by: tianzhou <230323+tianzhou@users.noreply.github.com> * Fix line reference in topologicallySortTypes comment Co-authored-by: tianzhou <230323+tianzhou@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: tianzhou <230323+tianzhou@users.noreply.github.com> --- internal/diff/topological.go | 28 ++++++++++++++++++++++++++++ 1 file changed, 28 insertions(+) diff --git a/internal/diff/topological.go b/internal/diff/topological.go index 9d567743..9282173a 100644 --- a/internal/diff/topological.go +++ b/internal/diff/topological.go @@ -307,6 +307,30 @@ func topologicallySortTypes(types []*ir.Type) []*ir.Type { for len(result) < len(typeMap) { if len(queue) == 0 { // Cycle detected: pick the next unprocessed type using original insertion order + // + // CYCLE BREAKING STRATEGY FOR TYPES: + // Setting inDegree[next] = 0 effectively declares "this type has no remaining dependencies" + // for the purpose of breaking the cycle. This is safe because: + // + // 1. The 'processed' map prevents any type from being added to the result twice, even if + // its inDegree becomes zero or negative multiple times (see line 344 check). + // + // 2. For circular type dependencies in PostgreSQL, the dependency cycle can only occur + // through composite types referencing each other. Unlike table foreign keys, type + // dependencies cannot be added after creation - the entire type definition must be + // complete at CREATE TYPE time. + // + // 3. PostgreSQL itself prohibits creating types with true circular dependencies + // (composite type A containing type B, which contains type A) because it would + // result in infinite size. The only cycles that can occur in practice involve + // array types or indirection (e.g., A contains B[], B contains A[]), which + // PostgreSQL allows because arrays don't expand the size infinitely. + // + // 4. Using insertion order (alphabetical by schema.name) ensures deterministic output + // when multiple valid orderings exist. + // + // For types with unavoidable circular references (via arrays), the order doesn't + // affect correctness since PostgreSQL's type system handles these internally. next := nextInOrder(insertionOrder, processed) if next == "" { break @@ -328,6 +352,10 @@ func topologicallySortTypes(types []*ir.Type) []*ir.Type { for _, neighbor := range neighbors { inDegree[neighbor]-- + // Add neighbor to queue if all its dependencies are satisfied. + // The '!processed[neighbor]' check is critical: it prevents re-adding types + // that have already been processed, even if their inDegree becomes <= 0 again + // due to cycle breaking (where we artificially set inDegree to 0). if inDegree[neighbor] <= 0 && !processed[neighbor] { queue = append(queue, neighbor) sort.Strings(queue) From 914294de4316022b568820e50d1ec19e4d294e49 Mon Sep 17 00:00:00 2001 From: Copilot <198982749+Copilot@users.noreply.github.com> Date: Wed, 17 Dec 2025 23:05:02 -0800 Subject: [PATCH 3/3] Add unit tests for topologicallySortTypes function (#207) * Initial plan * Add comprehensive unit tests for topologicallySortTypes function Co-authored-by: tianzhou <230323+tianzhou@users.noreply.github.com> * Remove duplicate newTestCompositeTypeMultiple helper function Co-authored-by: tianzhou <230323+tianzhou@users.noreply.github.com> --------- Co-authored-by: copilot-swe-agent[bot] <198982749+Copilot@users.noreply.github.com> Co-authored-by: tianzhou <230323+tianzhou@users.noreply.github.com> --- internal/diff/topological_test.go | 163 ++++++++++++++++++++++++++++++ 1 file changed, 163 insertions(+) diff --git a/internal/diff/topological_test.go b/internal/diff/topological_test.go index b01b3126..ecf0ba5d 100644 --- a/internal/diff/topological_test.go +++ b/internal/diff/topological_test.go @@ -59,3 +59,166 @@ func newTestTable(name string, deps ...string) *ir.Table { Constraints: constraints, } } + +func TestTopologicallySortTypesHandlesCycles(t *testing.T) { + types := []*ir.Type{ + // Simple chain: a <- b <- c + newTestEnumType("a"), + newTestCompositeType("b", "a"), + newTestCompositeType("c", "b"), + // Cycle: x <-> y (theoretically impossible in PostgreSQL but test handles it) + newTestCompositeType("x", "y"), + newTestCompositeType("y", "x"), + // Type depending on the cycle + newTestCompositeType("z", "y"), + } + + sorted := topologicallySortTypes(types) + if len(sorted) != len(types) { + t.Fatalf("expected %d types, got %d", len(types), len(sorted)) + } + + order := make(map[string]int, len(sorted)) + for idx, typ := range sorted { + order[typ.Name] = idx + } + + assertBefore := func(first, second string) { + if order[first] >= order[second] { + t.Fatalf("expected %s to appear before %s in %v", first, second, order) + } + } + + // Verify simple chain ordering + assertBefore("a", "b") + assertBefore("b", "c") + // Dependent types should still come after cycle members + assertBefore("y", "z") + + // Cycle members should have a deterministic order (insertion order) + if order["x"] >= order["y"] { + t.Fatalf("expected x to be ordered before y for deterministic output, got %v", order) + } +} + +func TestTopologicallySortTypesMultipleNoDependencies(t *testing.T) { + types := []*ir.Type{ + newTestEnumType("z"), + newTestEnumType("a"), + newTestEnumType("m"), + newTestEnumType("b"), + } + + sorted := topologicallySortTypes(types) + if len(sorted) != len(types) { + t.Fatalf("expected %d types, got %d", len(types), len(sorted)) + } + + // With no dependencies, should maintain deterministic alphabetical order + order := make(map[string]int, len(sorted)) + for idx, typ := range sorted { + order[typ.Name] = idx + } + + // Verify deterministic ordering: a < b < m < z + if order["a"] >= order["b"] || order["b"] >= order["m"] || order["m"] >= order["z"] { + t.Fatalf("expected alphabetical order for types with no dependencies, got %v", order) + } +} + +func TestTopologicallySortTypesDomainReferencingCustomType(t *testing.T) { + types := []*ir.Type{ + newTestEnumType("status_type"), + newTestDomainType("status_domain", "status_type"), + newTestCompositeType("person", "status_domain"), + } + + sorted := topologicallySortTypes(types) + if len(sorted) != len(types) { + t.Fatalf("expected %d types, got %d", len(types), len(sorted)) + } + + order := make(map[string]int, len(sorted)) + for idx, typ := range sorted { + order[typ.Name] = idx + } + + assertBefore := func(first, second string) { + if order[first] >= order[second] { + t.Fatalf("expected %s to appear before %s in %v", first, second, order) + } + } + + // Verify correct dependency chain + assertBefore("status_type", "status_domain") + assertBefore("status_domain", "person") +} + +func TestTopologicallySortTypesCompositeWithMultipleDependencies(t *testing.T) { + types := []*ir.Type{ + newTestEnumType("status"), + newTestEnumType("priority"), + newTestEnumType("category"), + newTestCompositeType("task", "status", "priority", "category"), + newTestCompositeType("project", "task"), + } + + sorted := topologicallySortTypes(types) + if len(sorted) != len(types) { + t.Fatalf("expected %d types, got %d", len(types), len(sorted)) + } + + order := make(map[string]int, len(sorted)) + for idx, typ := range sorted { + order[typ.Name] = idx + } + + assertBefore := func(first, second string) { + if order[first] >= order[second] { + t.Fatalf("expected %s to appear before %s in %v", first, second, order) + } + } + + // All dependencies should come before task + assertBefore("status", "task") + assertBefore("priority", "task") + assertBefore("category", "task") + // And task should come before project + assertBefore("task", "project") +} + +func newTestEnumType(name string) *ir.Type { + return &ir.Type{ + Schema: "public", + Name: name, + Kind: ir.TypeKindEnum, + EnumValues: []string{"value1", "value2"}, + } +} + +func newTestCompositeType(name string, deps ...string) *ir.Type { + columns := make([]*ir.TypeColumn, len(deps)) + for idx, dep := range deps { + columns[idx] = &ir.TypeColumn{ + Name: fmt.Sprintf("col_%d", idx), + DataType: dep, // References the type + Position: idx + 1, + } + } + + return &ir.Type{ + Schema: "public", + Name: name, + Kind: ir.TypeKindComposite, + Columns: columns, + } +} + +func newTestDomainType(name, baseType string) *ir.Type { + return &ir.Type{ + Schema: "public", + Name: name, + Kind: ir.TypeKindDomain, + BaseType: baseType, + } +}