Skip to content

Commit

Permalink
Fix attnum mismatch in pg_statistic after drop column and database re…
Browse files Browse the repository at this point in the history
…store (#90)

Problem:
In case a column (or columns) was dropped from a table, after backup and restore
cycle, second backup may contain pg_statistic values with a type not matching
the data (for ex. "array_in('{"test"}', '"timestamp"'::regtype::oid, -1)").

Root cause:
Records from pg_statistic were backed up with their actual values, including
attnums for the columns. But during restore, all tables are restored from
scratch. So, if a column is dropped in the table before backup, after the
restore attnum values in the pg_attribute table differ from the attnum
values that were before the backup (if the dropped column is not the one with
the highest attnum). But the restored values of pg_statistic had old attnums.
So, pg_statistics was not consistent with pg_attribute, and one of the side
effects was that on next backup the type for pg_statistic value was taken from
other column (and some pg_statistic were just lost).

Fix:
On the restore phase the column name is used to obtain actual attnum value from
the restored catalog and this value is now used when restoring statistics. The
'AttNumber' field is removed from the AttributeStatistic struct to prevent
its future use.

Ticket: ADBDEV-5861
  • Loading branch information
whitehawk authored Aug 17, 2024
1 parent 8060b4e commit 21ba1a1
Show file tree
Hide file tree
Showing 8 changed files with 221 additions and 37 deletions.
4 changes: 1 addition & 3 deletions backup/queries_statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@ type AttributeStatistic struct {
AttName string
Type string
Relid uint32 `db:"starelid"`
AttNumber int `db:"staattnum"`
Inherit bool `db:"stainherit"`
NullFraction float64 `db:"stanullfrac"`
Width int `db:"stawidth"`
Expand Down Expand Up @@ -81,10 +80,9 @@ func GetAttributeStatistics(connectionPool *dbconn.DBConn, tables []Table, proce
SELECT c.oid,
quote_ident(n.nspname) AS schema,
quote_ident(c.relname) AS table,
quote_ident(a.attname) AS attname,
a.attname,
quote_ident(t.typname) AS type,
s.starelid,
s.staattnum,
%s
s.stanullfrac,
s.stawidth,
Expand Down
12 changes: 7 additions & 5 deletions backup/statistics.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,14 +69,16 @@ func GenerateAttributeStatisticsQueries(table Table, attStat AttributeStatistic)
attributeSlotsQueryStr = generateAttributeSlotsQuery4(attStat)
}

attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE starelid = %s AND staattnum = %d;`, starelidStr, attStat.AttNumber))
attributeQueries = append(attributeQueries, fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
%s,
%d::smallint,%s
attributeQueries = append(attributeQueries, fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = %s AND attname = '%s');`, starelidStr, utils.EscapeSingleQuotes(attStat.AttName)))
attributeQueries = append(attributeQueries, fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%s
%f::real,
%d::integer,
%f::real,
%s);`, starelidStr, attStat.AttNumber, inheritStr, attStat.NullFraction, attStat.Width, attStat.Distinct, attributeSlotsQueryStr))
%s
FROM pg_attribute WHERE attrelid = %s AND attname = '%s';`, inheritStr, attStat.NullFraction, attStat.Width, attStat.Distinct, attributeSlotsQueryStr, starelidStr, utils.EscapeSingleQuotes(attStat.AttName)))

/*
* If a type name starts with exactly one underscore, it describes an array
Expand Down
56 changes: 31 additions & 25 deletions backup/statistics_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ var _ = Describe("backup/statistics tests", func() {
tupleStat2 := backup.TupleStatistic{Schema: "testschema", Table: "testtable2"}
attStat2 := []backup.AttributeStatistic{
{Schema: "testschema", Table: "testtable2", AttName: "testattWithArray", Type: "_array"},
{Schema: "testschema", Table: "testtable2", AttName: "testatt", Type: "_array", Relid: 2, AttNumber: 3, NullFraction: .4,
{Schema: "testschema", Table: "testtable2", AttName: "testatt", Type: "_array", Relid: 2, NullFraction: .4,
Width: 10, Distinct: .5, Kind1: 20, Operator1: 10, Numbers1: []string{"1", "2", "3"}, Values1: []string{"4", "5", "6"}},
}

Expand Down Expand Up @@ -100,11 +100,11 @@ SET
reltuples = 0.000000::real
WHERE oid = 'testschema.testtable2'::regclass::oid;`,

`DELETE FROM pg_statistic WHERE starelid = 'testschema.testtable2'::regclass::oid AND staattnum = 0;`,

fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema.testtable2'::regclass::oid,
0::smallint,%[1]s
`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray');`,
fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%[1]s
0.000000::real,
0::integer,
0.000000::real,
Expand All @@ -123,13 +123,14 @@ WHERE oid = 'testschema.testtable2'::regclass::oid;`,
NULL,
NULL,
NULL,%[5]s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),

`DELETE FROM pg_statistic WHERE starelid = 'testschema.testtable2'::regclass::oid AND staattnum = 3;`,
NULL
FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testattWithArray';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),

fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema.testtable2'::regclass::oid,
3::smallint,%[1]s
`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testatt');`,
fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%[1]s
0.400000::real,
10::integer,
0.500000::real,
Expand All @@ -148,7 +149,8 @@ WHERE oid = 'testschema.testtable2'::regclass::oid;`,
NULL,
NULL,
NULL,%[5]s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),
NULL
FROM pg_attribute WHERE attrelid = 'testschema.testtable2'::regclass::oid AND attname = 'testatt';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5),
}
testutils.AssertBufferContents(tocfile.StatisticsEntries, buffer, expected...)
})
Expand All @@ -171,20 +173,21 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))

It("generates attribute statistics query for array type", func() {
attStats := backup.AttributeStatistic{Schema: "testschema", Table: "testtable", AttName: "testatt", Type: "_array", Relid: 2,
AttNumber: 3, NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
Numbers1: pq.StringArray([]string{"1", "2", "3"}), Values1: pq.StringArray([]string{"4", "5", "6"})}
if connectionPool.Version.AtLeast("6") {
attStats.Kind5 = 10
attStats.Operator5 = 12
}

attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats)
Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE starelid = 'testschema."test''table"'::regclass::oid AND staattnum = 3;`)))
Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`)))

insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(0, 0)
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema."test''table"'::regclass::oid,
3::smallint,%s
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%s
0.400000::real,
10::integer,
0.500000::real,
Expand All @@ -203,11 +206,12 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))
NULL,
NULL,
NULL,%s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
NULL
FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
})
It("generates attribute statistics query for non-array type", func() {
attStats := backup.AttributeStatistic{Schema: "testschema", Table: "testtable", AttName: "testatt", Type: "testtype", Relid: 2,
AttNumber: 3, NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
NullFraction: .4, Width: 10, Distinct: .5, Kind1: 20, Operator1: 10,
Numbers1: pq.StringArray([]string{"1", "2", "3"}), Values1: pq.StringArray([]string{"4", "5", "6"})}
if connectionPool.Version.AtLeast("6") {
attStats.Kind5 = 10
Expand All @@ -216,12 +220,13 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))

attStatsQueries := backup.GenerateAttributeStatisticsQueries(tableTestTable, attStats)

Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE starelid = 'testschema."test''table"'::regclass::oid AND staattnum = 3;`)))
Expect(attStatsQueries[0]).To(Equal(fmt.Sprintf(`DELETE FROM pg_statistic WHERE (starelid, staattnum) IN
(SELECT attrelid, attnum FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt');`)))

insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5 := getStatInsertReplace(10, 12)
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic VALUES (
'testschema."test''table"'::regclass::oid,
3::smallint,%s
Expect(attStatsQueries[1]).To(Equal(fmt.Sprintf(`INSERT INTO pg_statistic SELECT
attrelid,
attnum,%s
0.400000::real,
10::integer,
0.500000::real,
Expand All @@ -240,7 +245,8 @@ WHERE oid = '"""test''schema"""."""test''table"""'::regclass::oid;`))
array_in('{"4","5","6"}', 'testtype'::regtype::oid, -1),
NULL,
NULL,%s
NULL);`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
NULL
FROM pg_attribute WHERE attrelid = 'testschema."test''table"'::regclass::oid AND attname = 'testatt';`, insertReplace1, insertReplace2, insertReplace3, insertReplace4, insertReplace5)))
})
})
Describe("AnyValues", func() {
Expand Down
18 changes: 18 additions & 0 deletions end_to_end/end_to_end_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -969,18 +969,28 @@ var _ = Describe("backup and restore end to end tests", func() {
"DROP INDEX schema2.foo3_idx1")
testhelper.AssertQueryRuns(backupConn,
"ANALYZE schema2.foo3")
testhelper.AssertQueryRuns(backupConn,
`CREATE TABLE schema2.foo4 ("schema2." TEXT)`)
defer testhelper.AssertQueryRuns(backupConn,
"DROP TABLE schema2.foo4")
testhelper.AssertQueryRuns(backupConn,
`INSERT INTO schema2.foo4 VALUES ('schema2.'),('schema2.')`)
testhelper.AssertQueryRuns(backupConn,
"ANALYZE schema2.foo4")
output := gpbackup(gpbackupPath, backupHelperPath,
"--with-stats")
timestamp := getBackupTimestamp(string(output))

gprestore(gprestorePath, restoreHelperPath, timestamp,
"--redirect-db", "restoredb",
"--include-table", "schema2.foo3",
"--include-table", "schema2.foo4",
"--redirect-schema", "schema3",
"--with-stats")

schema3TupleCounts := map[string]int{
"schema3.foo3": 100,
"schema3.foo4": 2,
}
assertDataRestored(restoreConn, schema3TupleCounts)
assertPGClassStatsRestored(restoreConn, restoreConn, schema3TupleCounts)
Expand All @@ -992,6 +1002,14 @@ var _ = Describe("backup and restore end to end tests", func() {
actualStatisticCount := dbconn.MustSelectString(restoreConn,
`SELECT count(*) FROM pg_statistic WHERE starelid='schema3.foo3'::regclass::oid;`)
Expect(actualStatisticCount).To(Equal("1"))

actualStatisticCount = dbconn.MustSelectString(restoreConn,
`SELECT count(*) FROM pg_statistic WHERE starelid='schema3.foo4'::regclass::oid;`)
Expect(actualStatisticCount).To(Equal("1"))

stavaluesTableFoo4 := dbconn.MustSelectString(restoreConn,
`SELECT stavalues1 FROM pg_statistic WHERE starelid='schema3.foo4'::regclass::oid;`)
Expect(stavaluesTableFoo4).To(Equal("{schema2.}"))
})
It("runs gprestore with --redirect-schema to redirect data back to the original database which still contain the original tables", func() {
skipIfOldBackupVersionBefore("1.17.0")
Expand Down
58 changes: 58 additions & 0 deletions integration/statistics_create_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,64 @@ var _ = Describe("backup integration tests", func() {
structmatcher.ExpectStructsToMatchExcluding(&oldAtts[i], &newAtts[i], "Oid", "Relid")
}
})
It("prints attribute and tuple statistics for a table with dropped column", func() {
tables := []backup.Table{
{Relation: backup.Relation{SchemaOid: 2200, Schema: "public", Name: "foo"}},
}

// Create and ANALYZE a table to generate statistics
testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public.foo(i int, j text, k bool, "i'2 3" int)`)
defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.foo")
testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.foo VALUES (1, 'a', 't', 1)")
testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.foo VALUES (2, 'b', 'f', 2)")
testhelper.AssertQueryRuns(connectionPool, "ANALYZE public.foo")
testhelper.AssertQueryRuns(connectionPool, "ALTER TABLE public.foo DROP COLUMN j")

oldTableOid := testutils.OidFromObjectName(connectionPool, "public", "foo", backup.TYPE_RELATION)
tables[0].Oid = oldTableOid

beforeAttStats := make(map[uint32][]backup.AttributeStatistic)
backup.GetAttributeStatistics(connectionPool, tables, func(attStat *backup.AttributeStatistic) {
beforeAttStats[attStat.Oid] = append(beforeAttStats[attStat.Oid], *attStat)
})
beforeTupleStats := make(map[uint32]backup.TupleStatistic)
backup.GetTupleStatistics(connectionPool, tables, func(tupleStat *backup.TupleStatistic) {
beforeTupleStats[tupleStat.Oid] = *tupleStat
})
beforeTupleStat := beforeTupleStats[oldTableOid]

// Drop and recreate the table to clear the statistics
testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.foo")
testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public.foo(i int, k bool, "i'2 3" int)`)

// Reload the retrieved statistics into the new table
PrintStatisticsStatements(backupfile, tocfile, tables, beforeAttStats, beforeTupleStats)
testhelper.AssertQueryRuns(connectionPool, buffer.String())

newTableOid := testutils.OidFromObjectName(connectionPool, "public", "foo", backup.TYPE_RELATION)
tables[0].Oid = newTableOid
afterAttStats := make(map[uint32][]backup.AttributeStatistic)
backup.GetAttributeStatistics(connectionPool, tables, func(attStat *backup.AttributeStatistic) {
afterAttStats[attStat.Oid] = append(afterAttStats[attStat.Oid], *attStat)
})
afterTupleStats := make(map[uint32]backup.TupleStatistic)
backup.GetTupleStatistics(connectionPool, tables, func(tupleStat *backup.TupleStatistic) {
afterTupleStats[tupleStat.Oid] = *tupleStat
})
afterTupleStat := afterTupleStats[newTableOid]

oldAtts := beforeAttStats[oldTableOid]
newAtts := afterAttStats[newTableOid]

// Ensure the statistics match
Expect(afterTupleStats).To(HaveLen(len(beforeTupleStats)))
structmatcher.ExpectStructsToMatchExcluding(&beforeTupleStat, &afterTupleStat, "Oid")
Expect(oldAtts).To(HaveLen(3))
Expect(newAtts).To(HaveLen(3))
for i := range oldAtts {
structmatcher.ExpectStructsToMatchExcluding(&oldAtts[i], &newAtts[i], "Oid", "Relid")
}
})
It("prints attribute and tuple statistics for a quoted table", func() {
tables := []backup.Table{
{Relation: backup.Relation{SchemaOid: 2200, Schema: "public", Name: "\"foo'\"\"''bar\""}},
Expand Down
6 changes: 3 additions & 3 deletions integration/statistics_queries_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,13 +47,13 @@ var _ = Describe("backup integration tests", func() {
* the same schema and data.
*/
expectedStats5I := backup.AttributeStatistic{Oid: tableOid, Schema: "public", Table: "foo", AttName: "i",
Type: "int4", Relid: tableOid, AttNumber: 1, Inherit: false, Width: 4, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 97,
Type: "int4", Relid: tableOid, Inherit: false, Width: 4, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 97,
Operator2: 97, Numbers2: []string{"1"}, Values1: []string{"1", "2", "3", "4"}}
expectedStats5J := backup.AttributeStatistic{Oid: tableOid, Schema: "public", Table: "foo", AttName: "j",
Type: "text", Relid: tableOid, AttNumber: 2, Inherit: false, Width: 2, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 664,
Type: "text", Relid: tableOid, Inherit: false, Width: 2, Distinct: -1, Kind1: 2, Kind2: 3, Operator1: 664,
Operator2: 664, Numbers2: []string{"1"}, Values1: []string{"a", "b", "c", "d"}}
expectedStats5K := backup.AttributeStatistic{Oid: tableOid, Schema: "public", Table: "foo", AttName: "k",
Type: "bool", Relid: tableOid, AttNumber: 3, Inherit: false, Width: 1, Distinct: -0.5, Kind1: 1, Kind2: 3, Operator1: 91,
Type: "bool", Relid: tableOid, Inherit: false, Width: 1, Distinct: -0.5, Kind1: 1, Kind2: 3, Operator1: 91,
Operator2: 58, Numbers1: []string{"0.5", "0.5"}, Numbers2: []string{"0.5"}, Values1: []string{"f", "t"}}
if connectionPool.Version.AtLeast("7") {
expectedStats5J.Collation1 = 100
Expand Down
14 changes: 13 additions & 1 deletion restore/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -410,11 +410,16 @@ func editStatementsRedirectSchema(statements []toc.StatementWithType, redirectSc
return
}

schemaMatch := `(?:".+?"|[^.]+?)` // matches either an unquoted schema with no dots or a quoted schema containing dots
schemaMatch := `(?:".+?"|[^."]+?)` // matches either an unquoted schema with no dots or quotes or a quoted schema containing dots
// This expression matches a GRANT or REVOKE statement on any object and captures the old schema name
permissionsRE := regexp.MustCompile(fmt.Sprintf(`(?m)(^(?:REVOKE|GRANT) .+ ON .+?) (%s)((\..+)? (?:FROM|TO) .+)`, schemaMatch))
// This expression matches an ATTACH PARTITION statement and captures both the parent and child schema names
attachRE := regexp.MustCompile(fmt.Sprintf(`(ALTER TABLE(?: ONLY)?) (%[1]s)(\..+ ATTACH PARTITION) (%[1]s)(\..+)`, schemaMatch))
// This expression matches a '<schema>.<table>'::regclass::oid expression.
tableMatch := schemaMatch // table name should follow the same rules as schema name
regclassOidRE := regexp.MustCompile(fmt.Sprintf(`'(%s)((\.%s)'\:\:regclass\:\:oid)`, schemaMatch, tableMatch))
// This expression matches the last occurence of the '<schema>.<table>'::regclass::oid expression
lastRegclassOidRE := regexp.MustCompile(fmt.Sprintf(`(?s)^(.*)(%s)(.*?)$`, regclassOidRE))
for i := range statements {
oldSchema := fmt.Sprintf("%s.", statements[i].Schema)
newSchema := fmt.Sprintf("%s.", redirectSchema)
Expand All @@ -440,6 +445,13 @@ func editStatementsRedirectSchema(statements []toc.StatementWithType, redirectSc
replaced = true
}

// Statistic statements needs one schema replacements. We replace the last occurence to avoid a very small
// chance that schema name was in the statistic data itself, that we do not want to alter.
if statements[i].ObjectType == toc.OBJ_STATISTICS {
statement = lastRegclassOidRE.ReplaceAllString(statement, fmt.Sprintf("${1}'%s${4}$6", redirectSchema))
replaced = true
}

// Only do a general replace if we haven't replaced anything yet, to avoid e.g. hitting a schema in a VIEW definition
if !replaced {
statement = strings.Replace(statement, oldSchema, newSchema, 1)
Expand Down
Loading

0 comments on commit 21ba1a1

Please sign in to comment.