From dbcfeff9aaee62814c4771baf22e016e423f1a57 Mon Sep 17 00:00:00 2001 From: hughcapet Date: Thu, 27 May 2021 15:22:13 +1000 Subject: [PATCH 01/25] Avoid lock acquisition for foreign tables backupDataForAllTables function used to attempt to acquire AccessShareLock on foreign tables. This behavior is fixed by skipping foreign and external tables at the very beginning of the worker goroutine (there is no need to acquire locks and copy data for both external and foreign tables). --- backup/data.go | 45 ++++++++++++++++++++++----------------------- backup/data_test.go | 18 ------------------ 2 files changed, 22 insertions(+), 41 deletions(-) diff --git a/backup/data.go b/backup/data.go index 729345f5a..f35f3759c 100644 --- a/backup/data.go +++ b/backup/data.go @@ -85,32 +85,27 @@ func CopyTableOut(connectionPool *dbconn.DBConn, table Table, destinationToWrite } func BackupSingleTableData(table Table, rowsCopiedMap map[uint32]int64, counters *BackupProgressCounters, whichConn int) error { - if table.SkipDataBackup() { - gplog.Verbose("Skipping data backup of table %s because it is either an external or foreign table.", table.FQN()) + atomic.AddInt64(&counters.NumRegTables, 1) + numTables := counters.NumRegTables //We save this so it won't be modified before we log it + if gplog.GetVerbosity() > gplog.LOGINFO { + // No progress bar at this log level, so we note table count here + gplog.Verbose("Writing data for table %s to file (table %d of %d)", table.FQN(), numTables, counters.TotalRegTables) } else { + gplog.Verbose("Writing data for table %s to file", table.FQN()) + } - atomic.AddInt64(&counters.NumRegTables, 1) - numTables := counters.NumRegTables //We save this so it won't be modified before we log it - if gplog.GetVerbosity() > gplog.LOGINFO { - // No progress bar at this log level, so we note table count here - gplog.Verbose("Writing data for table %s to file (table %d of %d)", table.FQN(), numTables, counters.TotalRegTables) - } else { - gplog.Verbose("Writing data for table %s to file", table.FQN()) - } - - destinationToWrite := "" - if MustGetFlagBool(options.SINGLE_DATA_FILE) { - destinationToWrite = fmt.Sprintf("%s_%d", globalFPInfo.GetSegmentPipePathForCopyCommand(), table.Oid) - } else { - destinationToWrite = globalFPInfo.GetTableBackupFilePathForCopyCommand(table.Oid, utils.GetPipeThroughProgram().Extension, false) - } - rowsCopied, err := CopyTableOut(connectionPool, table, destinationToWrite, whichConn) - if err != nil { - return err - } - rowsCopiedMap[table.Oid] = rowsCopied - counters.ProgressBar.Increment() + destinationToWrite := "" + if MustGetFlagBool(options.SINGLE_DATA_FILE) { + destinationToWrite = fmt.Sprintf("%s_%d", globalFPInfo.GetSegmentPipePathForCopyCommand(), table.Oid) + } else { + destinationToWrite = globalFPInfo.GetTableBackupFilePathForCopyCommand(table.Oid, utils.GetPipeThroughProgram().Extension, false) + } + rowsCopied, err := CopyTableOut(connectionPool, table, destinationToWrite, whichConn) + if err != nil { + return err } + rowsCopiedMap[table.Oid] = rowsCopied + counters.ProgressBar.Increment() return nil } @@ -146,6 +141,10 @@ func backupDataForAllTables(tables []Table) []map[uint32]int64 { return } + if table.SkipDataBackup() { + gplog.Verbose("Skipping data backup of table %s because it is either an external or foreign table.", table.FQN()) + continue + } // If a random external SQL command had queued an AccessExclusiveLock acquisition request // against this next table, the --job worker thread would deadlock on the COPY attempt. // To prevent gpbackup from hanging, we attempt to acquire an AccessShareLock on the diff --git a/backup/data_test.go b/backup/data_test.go index 49fac8bf8..d7c196356 100644 --- a/backup/data_test.go +++ b/backup/data_test.go @@ -173,24 +173,6 @@ var _ = Describe("backup/data tests", func() { Expect(rowsCopiedMap[0]).To(Equal(int64(10))) Expect(counters.NumRegTables).To(Equal(int64(1))) }) - It("backs up a single external table", func() { - _ = cmdFlags.Set(options.LEAF_PARTITION_DATA, "false") - testTable.IsExternal = true - err := backup.BackupSingleTableData(testTable, rowsCopiedMap, &counters, 0) - - Expect(err).ShouldNot(HaveOccurred()) - Expect(rowsCopiedMap).To(BeEmpty()) - Expect(counters.NumRegTables).To(Equal(int64(0))) - }) - It("backs up a single foreign table", func() { - _ = cmdFlags.Set(options.LEAF_PARTITION_DATA, "false") - testTable.ForeignDef = backup.ForeignTableDefinition{Oid: 23, Options: "", Server: "fs"} - err := backup.BackupSingleTableData(testTable, rowsCopiedMap, &counters, 0) - - Expect(err).ShouldNot(HaveOccurred()) - Expect(rowsCopiedMap).To(BeEmpty()) - Expect(counters.NumRegTables).To(Equal(int64(0))) - }) }) Describe("CheckDBContainsData", func() { config := history.BackupConfig{} From 180a5503d9add03d95f46b9917d14a47f447bf60 Mon Sep 17 00:00:00 2001 From: hughcapet Date: Fri, 28 May 2021 13:27:48 +1000 Subject: [PATCH 02/25] Add foreign tables backup test --- end_to_end/end_to_end_suite_test.go | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/end_to_end/end_to_end_suite_test.go b/end_to_end/end_to_end_suite_test.go index f11d071e9..bf5dc0f61 100644 --- a/end_to_end/end_to_end_suite_test.go +++ b/end_to_end/end_to_end_suite_test.go @@ -1356,6 +1356,33 @@ var _ = Describe("backup and restore end to end tests", func() { Expect(stdout).To(ContainSubstring("Backup completed successfully")) }) + It("successfully backs up foreign tables", func() { + if backupConn.Version.Before("6") { + Skip("Test does not apply for GPDB versions before 6") + } + testhelper.AssertQueryRuns(backupConn, + "CREATE FOREIGN DATA WRAPPER foreigndatawrapper;") + defer testhelper.AssertQueryRuns(backupConn, + "DROP FOREIGN DATA WRAPPER foreigndatawrapper CASCADE;") + testhelper.AssertQueryRuns(backupConn, + "CREATE SERVER sc FOREIGN DATA WRAPPER foreigndatawrapper;") + testhelper.AssertQueryRuns(backupConn, + "CREATE FOREIGN TABLE public.ft1 (field1 text) SERVER sc") + testhelper.AssertQueryRuns(backupConn, + "CREATE FOREIGN TABLE public.ft2 (field1 text) SERVER sc") + args := []string{ + "--dbname", "testdb", + "--backup-dir", backupDir, + "--jobs", "4", + "--verbose"} + cmd := exec.Command(gpbackupPath, args...) + output, _ := cmd.CombinedOutput() + stdout := string(output) + Expect(stdout).To(Not(ContainSubstring("CRITICAL"))) + Expect(stdout).To(ContainSubstring("Skipping data backup of table public.ft1")) + Expect(stdout).To(ContainSubstring("Skipping data backup of table public.ft2")) + Expect(stdout).To(ContainSubstring("Skipped data backup of 2 external/foreign table(s)")) + }) It("runs gpbackup with --version flag", func() { if useOldBackupVersion { Skip("This test is not needed for old backup versions") From 35810705dbe300b1d47544351c9aca01363f6564 Mon Sep 17 00:00:00 2001 From: Ivan Leskin Date: Thu, 22 Jul 2021 15:38:49 +0300 Subject: [PATCH 03/25] Add `--compression-type`. Refactor compression in `helper` Add a flag to set compression type. `gzip` is the only currently supported value. To disable compression, `--no-compression` or `--compression-level 0` are to be set. `gzip` is also the default value for this parameter. In `helper`, introduce `backup_helper_pipes.go`. This defines wrappers for buffer writers encapsulating various compression types (currently "no compression" and "gzip"). This allows to drop (somewhat) complex Writer closure algorithm in `backup_helper.go`. Update `helper_test` and `compression_test` according to new changes. --- backup/backup.go | 4 +-- backup/validate.go | 3 +- backup/wrappers.go | 1 + helper/backup_helper.go | 50 +++++++++++------------------ helper/backup_helper_pipes.go | 60 +++++++++++++++++++++++++++++++++++ helper/helper.go | 5 +-- history/history.go | 1 + integration/helper_test.go | 4 +-- options/flag.go | 2 ++ report/report_test.go | 5 +-- restore/wrappers.go | 2 +- utils/compression.go | 17 +++++++--- utils/compression_test.go | 4 +-- utils/util.go | 24 +++++++++++--- utils/util_test.go | 39 +++++++++++++++++------ 15 files changed, 159 insertions(+), 62 deletions(-) create mode 100644 helper/backup_helper_pipes.go diff --git a/backup/backup.go b/backup/backup.go index 8c139705e..3d2be8600 100644 --- a/backup/backup.go +++ b/backup/backup.go @@ -72,7 +72,7 @@ func DoSetup() { } globalTOC = &toc.TOC{} globalTOC.InitializeMetadataEntryMap() - utils.InitializePipeThroughParameters(!MustGetFlagBool(options.NO_COMPRESSION), MustGetFlagInt(options.COMPRESSION_LEVEL)) + utils.InitializePipeThroughParameters(!MustGetFlagBool(options.NO_COMPRESSION), MustGetFlagString(options.COMPRESSION_TYPE), MustGetFlagInt(options.COMPRESSION_LEVEL)) getQuotedRoleNames(connectionPool) pluginConfigFlag := MustGetFlagString(options.PLUGIN_CONFIG) @@ -268,7 +268,7 @@ func backupData(tables []Table) { } utils.WriteOidListToSegments(oidList, globalCluster, globalFPInfo) utils.CreateFirstSegmentPipeOnAllHosts(oidList[0], globalCluster, globalFPInfo) - compressStr := fmt.Sprintf(" --compression-level %d", MustGetFlagInt(options.COMPRESSION_LEVEL)) + compressStr := fmt.Sprintf(" --compression-level %d --compression-type %s", MustGetFlagInt(options.COMPRESSION_LEVEL), MustGetFlagString(options.COMPRESSION_TYPE)) if MustGetFlagBool(options.NO_COMPRESSION) { compressStr = " --compression-level 0" } diff --git a/backup/validate.go b/backup/validate.go index 3004ff225..a93efaf86 100644 --- a/backup/validate.go +++ b/backup/validate.go @@ -99,6 +99,7 @@ func validateFlagCombinations(flags *pflag.FlagSet) { options.CheckExclusiveFlags(flags, options.EXCLUDE_SCHEMA, options.EXCLUDE_SCHEMA_FILE, options.EXCLUDE_RELATION, options.INCLUDE_RELATION, options.EXCLUDE_RELATION_FILE, options.INCLUDE_RELATION_FILE) options.CheckExclusiveFlags(flags, options.JOBS, options.METADATA_ONLY, options.SINGLE_DATA_FILE) options.CheckExclusiveFlags(flags, options.METADATA_ONLY, options.LEAF_PARTITION_DATA) + options.CheckExclusiveFlags(flags, options.NO_COMPRESSION, options.COMPRESSION_TYPE) options.CheckExclusiveFlags(flags, options.NO_COMPRESSION, options.COMPRESSION_LEVEL) options.CheckExclusiveFlags(flags, options.PLUGIN_CONFIG, options.BACKUP_DIR) if MustGetFlagString(options.FROM_TIMESTAMP) != "" && !MustGetFlagBool(options.INCREMENTAL) { @@ -114,7 +115,7 @@ func validateFlagValues() { gplog.FatalOnError(err) err = utils.ValidateFullPath(MustGetFlagString(options.PLUGIN_CONFIG)) gplog.FatalOnError(err) - err = utils.ValidateCompressionLevel(MustGetFlagInt(options.COMPRESSION_LEVEL)) + err = utils.ValidateCompressionTypeAndLevel(MustGetFlagString(options.COMPRESSION_TYPE), MustGetFlagInt(options.COMPRESSION_LEVEL)) gplog.FatalOnError(err) if MustGetFlagString(options.FROM_TIMESTAMP) != "" && !filepath.IsValidTimestamp(MustGetFlagString(options.FROM_TIMESTAMP)) { gplog.Fatal(errors.Errorf("Timestamp %s is invalid. Timestamps must be in the format YYYYMMDDHHMMSS.", diff --git a/backup/wrappers.go b/backup/wrappers.go index 24e799f68..cf294d44f 100644 --- a/backup/wrappers.go +++ b/backup/wrappers.go @@ -80,6 +80,7 @@ func NewBackupConfig(dbName string, dbVersion string, backupVersion string, plug BackupDir: MustGetFlagString(options.BACKUP_DIR), BackupVersion: backupVersion, Compressed: !MustGetFlagBool(options.NO_COMPRESSION), + CompressionType: MustGetFlagString(options.COMPRESSION_TYPE), DatabaseName: dbName, DatabaseVersion: dbVersion, DataOnly: MustGetFlagBool(options.DATA_ONLY), diff --git a/helper/backup_helper.go b/helper/backup_helper.go index a64509f37..a57475a52 100644 --- a/helper/backup_helper.go +++ b/helper/backup_helper.go @@ -2,7 +2,6 @@ package helper import ( "bufio" - "compress/gzip" "fmt" "io" "os" @@ -21,11 +20,8 @@ import ( func doBackupAgent() error { var lastRead uint64 var ( - finalWriter io.Writer - gzipWriter *gzip.Writer - bufIoWriter *bufio.Writer - writeHandle io.WriteCloser - writeCmd *exec.Cmd + pipeWriter BackupPipeWriterCloser + writeCmd *exec.Cmd ) tocfile := &toc.SegmentTOC{} tocfile.DataEntries = make(map[uint]toc.SegmentDataEntry) @@ -60,14 +56,14 @@ func doBackupAgent() error { return err } if i == 0 { - finalWriter, gzipWriter, bufIoWriter, writeHandle, writeCmd, err = getBackupPipeWriter(*compressionLevel) + pipeWriter, writeCmd, err = getBackupPipeWriter() if err != nil { return err } } log(fmt.Sprintf("Backing up table with oid %d\n", oid)) - numBytes, err := io.Copy(finalWriter, reader) + numBytes, err := io.Copy(pipeWriter, reader) if err != nil { return errors.Wrap(err, strings.Trim(errBuf.String(), "\x00")) } @@ -86,15 +82,7 @@ func doBackupAgent() error { } } - /* - * The order for flushing and closing the writers below is very specific - * to ensure all data is written to the file and file handles are not leaked. - */ - if gzipWriter != nil { - _ = gzipWriter.Close() - } - _ = bufIoWriter.Flush() - _ = writeHandle.Close() + _ = pipeWriter.Close() if *pluginConfigFile != "" { /* * When using a plugin, the agent may take longer to finish than the @@ -129,31 +117,29 @@ func getBackupPipeReader(currentPipe string) (io.Reader, io.ReadCloser, error) { return reader, readHandle, nil } -func getBackupPipeWriter(compressLevel int) (io.Writer, *gzip.Writer, *bufio.Writer, io.WriteCloser, *exec.Cmd, error) { +func getBackupPipeWriter() (pipe BackupPipeWriterCloser, writeCmd *exec.Cmd, err error) { var writeHandle io.WriteCloser - var err error - var writeCmd *exec.Cmd if *pluginConfigFile != "" { writeCmd, writeHandle, err = startBackupPluginCommand() } else { writeHandle, err = os.Create(*dataFile) } if err != nil { - return nil, nil, nil, nil, nil, err + return nil, nil, err } - var finalWriter io.Writer - var gzipWriter *gzip.Writer - bufIoWriter := bufio.NewWriter(writeHandle) - finalWriter = bufIoWriter - if compressLevel > 0 { - gzipWriter, err = gzip.NewWriterLevel(bufIoWriter, compressLevel) - if err != nil { - return nil, nil, nil, nil, nil, err - } - finalWriter = gzipWriter + if *compressionLevel == 0 { + pipe = NewCommonBackupPipeWriterCloser(writeHandle) + return + } + + if *compressionType == "gzip" { + pipe, err = NewGZipBackupPipeWriterCloser(writeHandle, *compressionLevel) + return } - return finalWriter, gzipWriter, bufIoWriter, writeHandle, writeCmd, nil + + writeHandle.Close() + return nil, nil, fmt.Errorf("unknown compression type '%s' (compression level %d)", *compressionType, *compressionLevel) } func startBackupPluginCommand() (*exec.Cmd, io.WriteCloser, error) { diff --git a/helper/backup_helper_pipes.go b/helper/backup_helper_pipes.go new file mode 100644 index 000000000..317f5b3a7 --- /dev/null +++ b/helper/backup_helper_pipes.go @@ -0,0 +1,60 @@ +package helper + +import ( + "bufio" + "compress/gzip" + "io" +) + +type BackupPipeWriterCloser interface { + io.Writer + io.Closer +} + +type CommonBackupPipeWriterCloser struct { + writeHandle io.WriteCloser + bufIoWriter *bufio.Writer + finalWriter io.Writer +} + +func (cPipe CommonBackupPipeWriterCloser) Write(p []byte) (n int, err error) { + return cPipe.finalWriter.Write(p) +} + +// Never returns error, suppressing them instead +func (cPipe CommonBackupPipeWriterCloser) Close() error { + _ = cPipe.bufIoWriter.Flush() + _ = cPipe.writeHandle.Close() + return nil +} + +func NewCommonBackupPipeWriterCloser(writeHandle io.WriteCloser) (cPipe CommonBackupPipeWriterCloser) { + cPipe.writeHandle = writeHandle + cPipe.bufIoWriter = bufio.NewWriter(cPipe.writeHandle) + cPipe.finalWriter = cPipe.bufIoWriter + return +} + +type GZipBackupPipeWriterCloser struct { + cPipe CommonBackupPipeWriterCloser + gzipWriter *gzip.Writer +} + +func (gzPipe GZipBackupPipeWriterCloser) Write(p []byte) (n int, err error) { + return gzPipe.gzipWriter.Write(p) +} + +// Returns errors from underlying common writer only +func (gzPipe GZipBackupPipeWriterCloser) Close() error { + _ = gzPipe.gzipWriter.Close() + return gzPipe.cPipe.Close() +} + +func NewGZipBackupPipeWriterCloser(writeHandle io.WriteCloser, compressLevel int) (gzPipe GZipBackupPipeWriterCloser, err error) { + gzPipe.cPipe = NewCommonBackupPipeWriterCloser(writeHandle) + gzPipe.gzipWriter, err = gzip.NewWriterLevel(gzPipe.cPipe.bufIoWriter, compressLevel) + if err != nil { + gzPipe.cPipe.Close() + } + return +} diff --git a/helper/helper.go b/helper/helper.go index 57cb90a22..32ed90549 100644 --- a/helper/helper.go +++ b/helper/helper.go @@ -43,6 +43,7 @@ var ( var ( backupAgent *bool compressionLevel *int + compressionType *string content *int dataFile *string oidFile *string @@ -99,7 +100,8 @@ func InitializeGlobals() { backupAgent = flag.Bool("backup-agent", false, "Use gpbackup_helper as an agent for backup") content = flag.Int("content", -2, "Content ID of the corresponding segment") - compressionLevel = flag.Int("compression-level", 0, "The level of compression to use with gzip. O indicates no compression.") + compressionLevel = flag.Int("compression-level", 0, "The level of compression. O indicates no compression.") + compressionType = flag.String("compression-type", "gzip", "The type of compression. Valid values are 'gzip'") dataFile = flag.String("data-file", "", "Absolute path to the data file") oidFile = flag.String("oid-file", "", "Absolute path to the file containing a list of oids to restore") onErrorContinue = flag.Bool("on-error-continue", false, "Continue restore even when encountering an error") @@ -165,7 +167,6 @@ func flushAndCloseRestoreWriter() error { return nil } - /* * Shared helper functions */ diff --git a/history/history.go b/history/history.go index be51a006d..e82c4de8e 100644 --- a/history/history.go +++ b/history/history.go @@ -29,6 +29,7 @@ type BackupConfig struct { BackupDir string BackupVersion string Compressed bool + CompressionType string DatabaseName string DatabaseVersion string DataOnly bool diff --git a/integration/helper_test.go b/integration/helper_test.go index 72bf14589..4c098235c 100644 --- a/integration/helper_test.go +++ b/integration/helper_test.go @@ -108,7 +108,7 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { Expect(err).ToNot(HaveOccurred()) }) It("runs backup gpbackup_helper with compression", func() { - helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-level", "1", "--data-file", dataFileFullPath+".gz") + helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-type", "gzip", "--compression-level", "1", "--data-file", dataFileFullPath+".gz") writeToPipes(defaultData) err := helperCmd.Wait() printHelperLogOnError(err) @@ -124,7 +124,7 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { assertBackupArtifacts(false, true) }) It("runs backup gpbackup_helper with compression with plugin", func() { - helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-level", "1", "--data-file", dataFileFullPath+".gz", "--plugin-config", pluginConfigPath) + helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-type", "gzip", "--compression-level", "1", "--data-file", dataFileFullPath+".gz", "--plugin-config", pluginConfigPath) writeToPipes(defaultData) err := helperCmd.Wait() printHelperLogOnError(err) diff --git a/options/flag.go b/options/flag.go index 0e51bdcb2..9f7e0b591 100644 --- a/options/flag.go +++ b/options/flag.go @@ -15,6 +15,7 @@ import ( const ( BACKUP_DIR = "backup-dir" + COMPRESSION_TYPE = "compression-type" COMPRESSION_LEVEL = "compression-level" DATA_ONLY = "data-only" DBNAME = "dbname" @@ -51,6 +52,7 @@ const ( func SetBackupFlagDefaults(flagSet *pflag.FlagSet) { flagSet.String(BACKUP_DIR, "", "The absolute path of the directory to which all backup files will be written") + flagSet.String(COMPRESSION_TYPE, "gzip", "Type of compression to use during data backup. Valid values are: 'gzip'") flagSet.Int(COMPRESSION_LEVEL, 1, "Level of compression to use during data backup. Valid values are between 1 and 9.") flagSet.Bool(DATA_ONLY, false, "Only back up data, do not back up metadata") flagSet.String(DBNAME, "", "The database to be backed up") diff --git a/report/report_test.go b/report/report_test.go index 50defe52e..1d603ce61 100644 --- a/report/report_test.go +++ b/report/report_test.go @@ -295,10 +295,10 @@ restore status: Success but non-fatal errors occurred. See log file .+ for }) Describe("SetBackupParamFromFlags", func() { AfterEach(func() { - utils.InitializePipeThroughParameters(false, 0) + utils.InitializePipeThroughParameters(false, "", 0) }) It("configures the Report struct correctly", func() { - utils.InitializePipeThroughParameters(true, 0) + utils.InitializePipeThroughParameters(true, "gzip", 0) backupCmdFlags := pflag.NewFlagSet("gpbackup", pflag.ExitOnError) backup.SetCmdFlags(backupCmdFlags) err := backupCmdFlags.Set(options.INCLUDE_RELATION, "public.foobar") @@ -315,6 +315,7 @@ restore status: Success but non-fatal errors occurred. See log file .+ for structmatcher.ExpectStructsToMatch(history.BackupConfig{ BackupVersion: "0.1.0", Compressed: true, + CompressionType: "gzip", DatabaseName: "testdb", DatabaseVersion: "5.0.0 build test", IncludeSchemas: []string{}, diff --git a/restore/wrappers.go b/restore/wrappers.go index 140d35269..d32a2a2cb 100644 --- a/restore/wrappers.go +++ b/restore/wrappers.go @@ -134,7 +134,7 @@ func SetMaxCsvLineLengthQuery(connectionPool *dbconn.DBConn) string { func InitializeBackupConfig() { backupConfig = history.ReadConfigFile(globalFPInfo.GetConfigFilePath()) - utils.InitializePipeThroughParameters(backupConfig.Compressed, 0) + utils.InitializePipeThroughParameters(backupConfig.Compressed, backupConfig.CompressionType, 0) report.EnsureBackupVersionCompatibility(backupConfig.BackupVersion, version) report.EnsureDatabaseVersionCompatibility(backupConfig.DatabaseVersion, connectionPool.Version) } diff --git a/utils/compression.go b/utils/compression.go index 8269ea3d2..3908bb2bf 100644 --- a/utils/compression.go +++ b/utils/compression.go @@ -13,11 +13,20 @@ type PipeThroughProgram struct { Extension string } -func InitializePipeThroughParameters(compress bool, compressionLevel int) { - if compress { - pipeThroughProgram = PipeThroughProgram{Name: "gzip", OutputCommand: fmt.Sprintf("gzip -c -%d", compressionLevel), InputCommand: "gzip -d -c", Extension: ".gz"} - } else { +func InitializePipeThroughParameters(compress bool, compressionType string, compressionLevel int) { + if !compress { pipeThroughProgram = PipeThroughProgram{Name: "cat", OutputCommand: "cat -", InputCommand: "cat -", Extension: ""} + return + } + + // backward compatibility for inputs without compressionType + if compressionType == "" { + compressionType = "gzip" + } + + if compressionType == "gzip" { + pipeThroughProgram = PipeThroughProgram{Name: "gzip", OutputCommand: fmt.Sprintf("gzip -c -%d", compressionLevel), InputCommand: "gzip -d -c", Extension: ".gz"} + return } } diff --git a/utils/compression_test.go b/utils/compression_test.go index ae709fc51..924b8e0ea 100644 --- a/utils/compression_test.go +++ b/utils/compression_test.go @@ -39,7 +39,7 @@ var _ = Describe("utils/compression tests", func() { InputCommand: "cat -", Extension: "", } - utils.InitializePipeThroughParameters(false, 3) + utils.InitializePipeThroughParameters(false, "", 3) resultProgram := utils.GetPipeThroughProgram() structmatcher.ExpectStructsToMatch(&expectedProgram, &resultProgram) }) @@ -52,7 +52,7 @@ var _ = Describe("utils/compression tests", func() { InputCommand: "gzip -d -c", Extension: ".gz", } - utils.InitializePipeThroughParameters(true, 7) + utils.InitializePipeThroughParameters(true, "gzip", 7) resultProgram := utils.GetPipeThroughProgram() structmatcher.ExpectStructsToMatch(&expectedProgram, &resultProgram) }) diff --git a/utils/util.go b/utils/util.go index 93c9f6d08..be89f914c 100644 --- a/utils/util.go +++ b/utils/util.go @@ -43,11 +43,11 @@ func RemoveFileIfExists(filename string) error { } func OpenFileForWrite(filename string) (*os.File, error) { - return os.OpenFile(filename, os.O_CREATE | os.O_TRUNC | os.O_WRONLY, 0644) + return os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) } func WriteToFileAndMakeReadOnly(filename string, contents []byte) error { - file, err := os.OpenFile(filename, os.O_CREATE | os.O_TRUNC | os.O_WRONLY, 0644) + file, err := os.OpenFile(filename, os.O_CREATE|os.O_TRUNC|os.O_WRONLY, 0644) if err != nil { return err } @@ -109,10 +109,24 @@ func ValidateFullPath(path string) error { return nil } -func ValidateCompressionLevel(compressionLevel int) error { - if compressionLevel < 1 || compressionLevel > 9 { - return errors.Errorf("Compression level must be between 1 and 9") +func ValidateCompressionTypeAndLevel(compressionType string, compressionLevel int) error { + minAllowedCompressionLevels := map[string]int{ + "gzip": 1, } + maxAllowedCompressionLevels := map[string]int{ + "gzip": 9, + } + + if maxAllowedLevel, ok := maxAllowedCompressionLevels[compressionType]; ok { + minAllowedLevel := minAllowedCompressionLevels[compressionType] + + if compressionLevel < minAllowedLevel || compressionLevel > maxAllowedLevel { + return fmt.Errorf("compression type '%s' only allows compression levels between %d and %d, but the provided level is %d", compressionType, minAllowedLevel, maxAllowedLevel, compressionLevel) + } + } else { + return fmt.Errorf("unknown compression type '%s'", compressionType) + } + return nil } diff --git a/utils/util_test.go b/utils/util_test.go index 579b04460..0a3bb3cfd 100644 --- a/utils/util_test.go +++ b/utils/util_test.go @@ -103,21 +103,42 @@ var _ = Describe("utils/util tests", func() { utils.ValidateGPDBVersionCompatibility(connectionPool) }) }) - Describe("ValidateCompressionLevel", func() { - It("validates a compression level between 1 and 9", func() { + Describe("ValidateCompressionTypeAndLevel", func() { + It("validates a compression type 'gzip' and a level between 1 and 9", func() { + compressType := "gzip" compressLevel := 5 - err := utils.ValidateCompressionLevel(compressLevel) + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) Expect(err).To(Not(HaveOccurred())) }) - It("panics if given a compression level < 1", func() { + It("panics if given a compression type 'gzip' and a compression level < 1", func() { + compressType := "gzip" compressLevel := 0 - err := utils.ValidateCompressionLevel(compressLevel) - Expect(err).To(MatchError("Compression level must be between 1 and 9")) + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(MatchError("compression type 'gzip' only allows compression levels between 1 and 9, but the provided level is 0")) }) - It("panics if given a compression level > 9", func() { + It("panics if given a compression type 'gzip' and a compression level > 9", func() { + compressType := "gzip" compressLevel := 11 - err := utils.ValidateCompressionLevel(compressLevel) - Expect(err).To(MatchError("Compression level must be between 1 and 9")) + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(MatchError("compression type 'gzip' only allows compression levels between 1 and 9, but the provided level is 11")) + }) + It("panics if given a compression type 'invalid' and a compression level > 0", func() { + compressType := "invalid" + compressLevel := 1 + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(MatchError("unknown compression type 'invalid'")) + }) + It("panics if given a compression type 'invalid' and a compression level < 0", func() { + compressType := "invalid" + compressLevel := -1 + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(MatchError("unknown compression type 'invalid'")) + }) + It("panics if given a compression type '' and a compression level > 0", func() { + compressType := "" + compressLevel := 1 + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(MatchError("unknown compression type ''")) }) }) Describe("UnquoteIdent", func() { From 814ddcd6d42aaa6514b0d477dc28ec7aa7c29df2 Mon Sep 17 00:00:00 2001 From: Ivan Leskin Date: Thu, 29 Jul 2021 14:15:39 +0300 Subject: [PATCH 04/25] Implement ZSTD compression support Add `zstd` compression type in `backup`, `restore`, and `helper`. https://pkg.go.dev/github.com/klauspost/compress/zstd is used as the provider of ZSTD implementation in `backup_helper`. Implement tests for ZSTD compression. Update descriptions on validity of command-line options. Co-Authored-By: Jimmy Yih --- backup/data_test.go | 27 +++++++- go.mod | 1 + go.sum | 4 ++ helper/backup_helper.go | 4 ++ helper/backup_helper_pipes.go | 26 +++++++ helper/helper.go | 4 +- helper/restore_helper.go | 22 ++++-- integration/helper_test.go | 125 +++++++++++++++++++++++++++------- options/flag.go | 4 +- restore/data_test.go | 26 ++++++- utils/compression.go | 5 ++ utils/compression_test.go | 15 +++- utils/util.go | 22 +++--- utils/util_test.go | 18 +++++ 14 files changed, 253 insertions(+), 50 deletions(-) diff --git a/backup/data_test.go b/backup/data_test.go index d7c196356..d4d7db22e 100644 --- a/backup/data_test.go +++ b/backup/data_test.go @@ -73,7 +73,7 @@ var _ = Describe("backup/data tests", func() { }) Describe("CopyTableOut", func() { testTable := backup.Table{Relation: backup.Relation{SchemaOid: 2345, Oid: 3456, Schema: "public", Name: "foo"}} - It("will back up a table to its own file with compression", func() { + It("will back up a table to its own file with gzip compression", func() { utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "gzip", OutputCommand: "gzip -c -8", InputCommand: "gzip -d -c", Extension: ".gz"}) execStr := regexp.QuoteMeta("COPY public.foo TO PROGRAM 'gzip -c -8 > /backups/20170101/20170101010101/gpbackup__20170101010101_3456.gz' WITH CSV DELIMITER ',' ON SEGMENT IGNORE EXTERNAL PARTITIONS;") mock.ExpectExec(execStr).WillReturnResult(sqlmock.NewResult(10, 0)) @@ -83,7 +83,7 @@ var _ = Describe("backup/data tests", func() { Expect(err).ShouldNot(HaveOccurred()) }) - It("will back up a table to its own file with compression using a plugin", func() { + It("will back up a table to its own file with gzip compression using a plugin", func() { _ = cmdFlags.Set(options.PLUGIN_CONFIG, "/tmp/plugin_config") pluginConfig := utils.PluginConfig{ExecutablePath: "/tmp/fake-plugin.sh", ConfigPath: "/tmp/plugin_config"} backup.SetPluginConfig(&pluginConfig) @@ -96,6 +96,29 @@ var _ = Describe("backup/data tests", func() { Expect(err).ShouldNot(HaveOccurred()) }) + It("will back up a table to its own file with zstd compression", func() { + utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "zstd", OutputCommand: "zstd --compress -3 -c", InputCommand: "zstd --decompress -c", Extension: ".zst"}) + execStr := regexp.QuoteMeta("COPY public.foo TO PROGRAM 'zstd --compress -3 -c > /backups/20170101/20170101010101/gpbackup__20170101010101_3456.zst' WITH CSV DELIMITER ',' ON SEGMENT IGNORE EXTERNAL PARTITIONS;") + mock.ExpectExec(execStr).WillReturnResult(sqlmock.NewResult(10, 0)) + filename := "/backups/20170101/20170101010101/gpbackup__20170101010101_3456.zst" + + _, err := backup.CopyTableOut(connectionPool, testTable, filename, defaultConnNum) + + Expect(err).ShouldNot(HaveOccurred()) + }) + It("will back up a table to its own file with zstd compression using a plugin", func() { + _ = cmdFlags.Set(options.PLUGIN_CONFIG, "/tmp/plugin_config") + pluginConfig := utils.PluginConfig{ExecutablePath: "/tmp/fake-plugin.sh", ConfigPath: "/tmp/plugin_config"} + backup.SetPluginConfig(&pluginConfig) + utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "zstd", OutputCommand: "zstd --compress -3 -c", InputCommand: "zstd --decompress -c", Extension: ".zst"}) + execStr := regexp.QuoteMeta("COPY public.foo TO PROGRAM 'zstd --compress -3 -c | /tmp/fake-plugin.sh backup_data /tmp/plugin_config /backups/20170101/20170101010101/gpbackup__20170101010101_3456' WITH CSV DELIMITER ',' ON SEGMENT IGNORE EXTERNAL PARTITIONS;") + mock.ExpectExec(execStr).WillReturnResult(sqlmock.NewResult(10, 0)) + + filename := "/backups/20170101/20170101010101/gpbackup__20170101010101_3456" + _, err := backup.CopyTableOut(connectionPool, testTable, filename, defaultConnNum) + + Expect(err).ShouldNot(HaveOccurred()) + }) It("will back up a table to its own file without compression", func() { utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "cat", OutputCommand: "cat -", InputCommand: "cat -", Extension: ""}) execStr := regexp.QuoteMeta("COPY public.foo TO PROGRAM 'cat - > /backups/20170101/20170101010101/gpbackup__20170101010101_3456' WITH CSV DELIMITER ',' ON SEGMENT IGNORE EXTERNAL PARTITIONS;") diff --git a/go.mod b/go.mod index 4013038f1..1316642b6 100644 --- a/go.mod +++ b/go.mod @@ -9,6 +9,7 @@ require ( github.com/fatih/color v1.9.0 // indirect github.com/greenplum-db/gp-common-go-libs v1.0.5-0.20201005232358-ee3f0135881b github.com/jackc/pgconn v1.7.0 + github.com/klauspost/compress v1.13.1 github.com/lib/pq v1.3.0 github.com/mattn/go-runewidth v0.0.8 // indirect github.com/nightlyone/lockfile v0.0.0-20200124072040-edb130adc195 diff --git a/go.sum b/go.sum index 0b0e77cd5..2e03a63f0 100644 --- a/go.sum +++ b/go.sum @@ -39,6 +39,8 @@ github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:W github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.2 h1:+Z5KGCizgyZCbGh1KZqA0fcLLkwbsjIzS4aV2v7wJX0= github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/snappy v0.0.3 h1:fHPg5GQYlCeLIPB9BZqMVR5nR9A+IM5zcgeTdjMYmLA= +github.com/golang/snappy v0.0.3/go.mod h1:/XxbfmMg8lxefKM7IXC3fBNl/7bRcc72aCRzEWrmP2Q= github.com/google/go-cmp v0.3.0/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.3.1/go.mod h1:8QqcDgzrUqlUb/G2PQTWiueGozuR1884gddMywk6iLU= github.com/google/go-cmp v0.4.0 h1:xsAVV57WRhGj6kEIi8ReJzQlHHqcBYCElAvkovg3B/4= @@ -108,6 +110,8 @@ github.com/jackc/puddle v1.1.2/go.mod h1:m4B5Dj62Y0fbyuIc15OsIqK0+JU8nkqQjsgx7dv github.com/jmoiron/sqlx v0.0.0-20180614180643-0dae4fefe7c0 h1:5B0uxl2lzNRVkJVg+uGHxWtRt4C0Wjc6kJKo5XYx8xE= github.com/jmoiron/sqlx v0.0.0-20180614180643-0dae4fefe7c0/go.mod h1:IiEW3SEiiErVyFdH8NTuWjSifiEQKUoyK3LNqr2kCHU= github.com/kisielk/gotool v1.0.0/go.mod h1:XhKaO+MFFWcvkIS/tQcRk01m1F5IRFswLeQ+oQHNcck= +github.com/klauspost/compress v1.13.1 h1:wXr2uRxZTJXHLly6qhJabee5JqIhTRoLBhDOA74hDEQ= +github.com/klauspost/compress v1.13.1/go.mod h1:8dP1Hq4DHOhN9w426knH3Rhby4rFm6D8eO+e+Dq5Gzg= github.com/konsorten/go-windows-terminal-sequences v1.0.1/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/konsorten/go-windows-terminal-sequences v1.0.2/go.mod h1:T0+1ngSBFLxvqU3pZ+m/2kptfBszLMUkC4ZK/EgS/cQ= github.com/kr/pretty v0.1.0 h1:L/CwN0zerZDmRFUapSPitk6f+Q3+0za1rQkzVuMiMFI= diff --git a/helper/backup_helper.go b/helper/backup_helper.go index a57475a52..2c2982b13 100644 --- a/helper/backup_helper.go +++ b/helper/backup_helper.go @@ -137,6 +137,10 @@ func getBackupPipeWriter() (pipe BackupPipeWriterCloser, writeCmd *exec.Cmd, err pipe, err = NewGZipBackupPipeWriterCloser(writeHandle, *compressionLevel) return } + if *compressionType == "zstd" { + pipe, err = NewZSTDBackupPipeWriterCloser(writeHandle, *compressionLevel) + return + } writeHandle.Close() return nil, nil, fmt.Errorf("unknown compression type '%s' (compression level %d)", *compressionType, *compressionLevel) diff --git a/helper/backup_helper_pipes.go b/helper/backup_helper_pipes.go index 317f5b3a7..29edebfb9 100644 --- a/helper/backup_helper_pipes.go +++ b/helper/backup_helper_pipes.go @@ -4,6 +4,8 @@ import ( "bufio" "compress/gzip" "io" + + "github.com/klauspost/compress/zstd" ) type BackupPipeWriterCloser interface { @@ -58,3 +60,27 @@ func NewGZipBackupPipeWriterCloser(writeHandle io.WriteCloser, compressLevel int } return } + +type ZSTDBackupPipeWriterCloser struct { + cPipe CommonBackupPipeWriterCloser + zstdEncoder *zstd.Encoder +} + +func (zstdPipe ZSTDBackupPipeWriterCloser) Write(p []byte) (n int, err error) { + return zstdPipe.zstdEncoder.Write(p) +} + +// Returns errors from underlying common writer only +func (zstdPipe ZSTDBackupPipeWriterCloser) Close() error { + _ = zstdPipe.zstdEncoder.Close() + return zstdPipe.cPipe.Close() +} + +func NewZSTDBackupPipeWriterCloser(writeHandle io.WriteCloser, compressLevel int) (zstdPipe ZSTDBackupPipeWriterCloser, err error) { + zstdPipe.cPipe = NewCommonBackupPipeWriterCloser(writeHandle) + zstdPipe.zstdEncoder, err = zstd.NewWriter(zstdPipe.cPipe.bufIoWriter, zstd.WithEncoderLevel(zstd.EncoderLevelFromZstd(compressLevel))) + if err != nil { + zstdPipe.cPipe.Close() + } + return +} diff --git a/helper/helper.go b/helper/helper.go index 32ed90549..461a3a670 100644 --- a/helper/helper.go +++ b/helper/helper.go @@ -100,8 +100,8 @@ func InitializeGlobals() { backupAgent = flag.Bool("backup-agent", false, "Use gpbackup_helper as an agent for backup") content = flag.Int("content", -2, "Content ID of the corresponding segment") - compressionLevel = flag.Int("compression-level", 0, "The level of compression. O indicates no compression.") - compressionType = flag.String("compression-type", "gzip", "The type of compression. Valid values are 'gzip'") + compressionLevel = flag.Int("compression-level", 0, "The level of compression. O indicates no compression. Range of valid values depends on compression type") + compressionType = flag.String("compression-type", "gzip", "The type of compression. Valid values are 'gzip', 'zstd'") dataFile = flag.String("data-file", "", "Absolute path to the data file") oidFile = flag.String("oid-file", "", "Absolute path to the file containing a list of oids to restore") onErrorContinue = flag.Bool("on-error-continue", false, "Continue restore even when encountering an error") diff --git a/helper/restore_helper.go b/helper/restore_helper.go index b6ab4c51f..91f7d4b9e 100644 --- a/helper/restore_helper.go +++ b/helper/restore_helper.go @@ -12,6 +12,7 @@ import ( "github.com/greenplum-db/gpbackup/toc" "github.com/greenplum-db/gpbackup/utils" + "github.com/klauspost/compress/zstd" "github.com/pkg/errors" ) @@ -19,10 +20,11 @@ import ( * Restore specific functions */ type ReaderType string + const ( - SEEKABLE ReaderType = "seekable" // reader which supports seek - NONSEEKABLE = "discard" // reader which is not seekable - SUBSET = "subset" // reader which operates on pre filtered data + SEEKABLE ReaderType = "seekable" // reader which supports seek + NONSEEKABLE = "discard" // reader which is not seekable + SUBSET = "subset" // reader which operates on pre filtered data ) /* RestoreReader structure to wrap the underlying reader. @@ -134,7 +136,7 @@ func doRestoreAgent() error { } log(fmt.Sprintf("Restoring table with oid %d", oid)) - bytesRead, err = reader.copyData(int64(end-start)) + bytesRead, err = reader.copyData(int64(end - start)) if err != nil { // In case COPY FROM or copyN fails in the middle of a load. We // need to update the lastByte with the amount of bytes that was @@ -192,7 +194,7 @@ func getRestoreDataReader(toc *toc.SegmentTOC, oidList []int) (*RestoreReader, e restoreReader.readerType = NONSEEKABLE } } else { - if *isFiltered && !strings.HasSuffix(*dataFile, ".gz") { + if *isFiltered && !strings.HasSuffix(*dataFile, ".gz") && !strings.HasSuffix(*dataFile, ".zst") { // Seekable reader if backup is not compressed and filters are set seekHandle, err = os.Open(*dataFile) restoreReader.readerType = SEEKABLE @@ -215,6 +217,12 @@ func getRestoreDataReader(toc *toc.SegmentTOC, oidList []int) (*RestoreReader, e return nil, err } restoreReader.bufReader = bufio.NewReader(gzipReader) + } else if strings.HasSuffix(*dataFile, ".zst") { + zstdReader, err := zstd.NewReader(readHandle) + if err != nil { + return nil, err + } + restoreReader.bufReader = bufio.NewReader(zstdReader) } else { restoreReader.bufReader = bufio.NewReader(readHandle) } @@ -241,7 +249,7 @@ func getRestorePipeWriter(currentPipe string) (*bufio.Writer, *os.File, error) { // adopting the new kernel, we must only use the bare essential methods Write() and // Close() for the pipe to avoid an extra buffer read that can happen in error // scenarios with --on-error-continue. - pipeWriter := bufio.NewWriter(struct{io.WriteCloser}{fileHandle}) + pipeWriter := bufio.NewWriter(struct{ io.WriteCloser }{fileHandle}) return pipeWriter, fileHandle, nil } @@ -253,7 +261,7 @@ func startRestorePluginCommand(toc *toc.SegmentTOC, oidList []int) (io.Reader, b return nil, false, err } cmdStr := "" - if pluginConfig.CanRestoreSubset() && *isFiltered && !strings.HasSuffix(*dataFile, ".gz") { + if pluginConfig.CanRestoreSubset() && *isFiltered && !strings.HasSuffix(*dataFile, ".gz") && !strings.HasSuffix(*dataFile, ".zst") { offsetsFile, _ := ioutil.TempFile("/tmp", "gprestore_offsets_") defer func() { offsetsFile.Close() diff --git a/integration/helper_test.go b/integration/helper_test.go index 4c098235c..e46bfa814 100644 --- a/integration/helper_test.go +++ b/integration/helper_test.go @@ -14,6 +14,7 @@ import ( "time" "github.com/greenplum-db/gp-common-go-libs/operating" + "github.com/klauspost/compress/zstd" . "github.com/onsi/ginkgo" . "github.com/onsi/gomega" @@ -98,7 +99,7 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { err := helperCmd.Wait() printHelperLogOnError(err) Expect(err).ToNot(HaveOccurred()) - assertBackupArtifacts(false, false) + assertBackupArtifacts(false) }) It("runs backup gpbackup_helper with data exceeding pipe buffer size", func() { helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-level", "0", "--data-file", dataFileFullPath) @@ -107,13 +108,21 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { printHelperLogOnError(err) Expect(err).ToNot(HaveOccurred()) }) - It("runs backup gpbackup_helper with compression", func() { + It("runs backup gpbackup_helper with gzip compression", func() { helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-type", "gzip", "--compression-level", "1", "--data-file", dataFileFullPath+".gz") writeToPipes(defaultData) err := helperCmd.Wait() printHelperLogOnError(err) Expect(err).ToNot(HaveOccurred()) - assertBackupArtifacts(true, false) + assertBackupArtifactsWithCompression("gzip", false) + }) + It("runs backup gpbackup_helper with zstd compression", func() { + helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-type", "zstd", "--compression-level", "1", "--data-file", dataFileFullPath+".zst") + writeToPipes(defaultData) + err := helperCmd.Wait() + printHelperLogOnError(err) + Expect(err).ToNot(HaveOccurred()) + assertBackupArtifactsWithCompression("zstd", false) }) It("runs backup gpbackup_helper without compression with plugin", func() { helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-level", "0", "--data-file", dataFileFullPath, "--plugin-config", pluginConfigPath) @@ -121,15 +130,23 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { err := helperCmd.Wait() printHelperLogOnError(err) Expect(err).ToNot(HaveOccurred()) - assertBackupArtifacts(false, true) + assertBackupArtifacts(true) }) - It("runs backup gpbackup_helper with compression with plugin", func() { + It("runs backup gpbackup_helper with gzip compression with plugin", func() { helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-type", "gzip", "--compression-level", "1", "--data-file", dataFileFullPath+".gz", "--plugin-config", pluginConfigPath) writeToPipes(defaultData) err := helperCmd.Wait() printHelperLogOnError(err) Expect(err).ToNot(HaveOccurred()) - assertBackupArtifacts(true, true) + assertBackupArtifactsWithCompression("gzip", true) + }) + It("runs backup gpbackup_helper with zstd compression with plugin", func() { + helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-type", "zstd", "--compression-level", "1", "--data-file", dataFileFullPath+".zst", "--plugin-config", pluginConfigPath) + writeToPipes(defaultData) + err := helperCmd.Wait() + printHelperLogOnError(err) + Expect(err).ToNot(HaveOccurred()) + assertBackupArtifactsWithCompression("zstd", true) }) It("Generates error file when backup agent interrupted", func() { helperCmd := gpbackupHelper(gpbackupHelperPath, "--backup-agent", "--compression-level", "0", "--data-file", dataFileFullPath) @@ -143,7 +160,7 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { }) Context("restore tests", func() { It("runs restore gpbackup_helper without compression", func() { - setupRestoreFiles(false, false) + setupRestoreFiles("", false) helperCmd := gpbackupHelper(gpbackupHelperPath, "--restore-agent", "--data-file", dataFileFullPath) for _, i := range []int{1, 3} { contents, _ := ioutil.ReadFile(fmt.Sprintf("%s_%d", pipeFile, i)) @@ -154,8 +171,8 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { Expect(err).ToNot(HaveOccurred()) assertNoErrors() }) - It("runs restore gpbackup_helper with compression", func() { - setupRestoreFiles(true, false) + It("runs restore gpbackup_helper with gzip compression", func() { + setupRestoreFiles("gzip", false) helperCmd := gpbackupHelper(gpbackupHelperPath, "--restore-agent", "--data-file", dataFileFullPath+".gz") for _, i := range []int{1, 3} { contents, _ := ioutil.ReadFile(fmt.Sprintf("%s_%d", pipeFile, i)) @@ -166,8 +183,20 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { Expect(err).ToNot(HaveOccurred()) assertNoErrors() }) + It("runs restore gpbackup_helper with zstd compression", func() { + setupRestoreFiles("zstd", false) + helperCmd := gpbackupHelper(gpbackupHelperPath, "--restore-agent", "--data-file", dataFileFullPath+".zst") + for _, i := range []int{1, 3} { + contents, _ := ioutil.ReadFile(fmt.Sprintf("%s_%d", pipeFile, i)) + Expect(string(contents)).To(Equal("here is some data\n")) + } + err := helperCmd.Wait() + printHelperLogOnError(err) + Expect(err).ToNot(HaveOccurred()) + assertNoErrors() + }) It("runs restore gpbackup_helper without compression with plugin", func() { - setupRestoreFiles(false, true) + setupRestoreFiles("", true) helperCmd := gpbackupHelper(gpbackupHelperPath, "--restore-agent", "--data-file", dataFileFullPath, "--plugin-config", pluginConfigPath) for _, i := range []int{1, 3} { contents, _ := ioutil.ReadFile(fmt.Sprintf("%s_%d", pipeFile, i)) @@ -178,8 +207,8 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { Expect(err).ToNot(HaveOccurred()) assertNoErrors() }) - It("runs restore gpbackup_helper with compression with plugin", func() { - setupRestoreFiles(true, true) + It("runs restore gpbackup_helper with gzip compression with plugin", func() { + setupRestoreFiles("gzip", true) helperCmd := gpbackupHelper(gpbackupHelperPath, "--restore-agent", "--data-file", dataFileFullPath+".gz", "--plugin-config", pluginConfigPath) for _, i := range []int{1, 3} { contents, _ := ioutil.ReadFile(fmt.Sprintf("%s_%d", pipeFile, i)) @@ -190,8 +219,20 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { Expect(err).ToNot(HaveOccurred()) assertNoErrors() }) + It("runs restore gpbackup_helper with zstd compression with plugin", func() { + setupRestoreFiles("zstd", true) + helperCmd := gpbackupHelper(gpbackupHelperPath, "--restore-agent", "--data-file", dataFileFullPath+".zst", "--plugin-config", pluginConfigPath) + for _, i := range []int{1, 3} { + contents, _ := ioutil.ReadFile(fmt.Sprintf("%s_%d", pipeFile, i)) + Expect(string(contents)).To(Equal("here is some data\n")) + } + err := helperCmd.Wait() + printHelperLogOnError(err) + Expect(err).ToNot(HaveOccurred()) + assertNoErrors() + }) It("Generates error file when restore agent interrupted", func() { - setupRestoreFiles(true, false) + setupRestoreFiles("gzip", false) helperCmd := gpbackupHelper(gpbackupHelperPath, "--restore-agent", "--data-file", dataFileFullPath+".gz") waitForPipeCreation() err := helperCmd.Process.Signal(os.Interrupt) @@ -270,20 +311,27 @@ var _ = Describe("gpbackup_helper end to end integration tests", func() { }) }) -func setupRestoreFiles(withCompression bool, withPlugin bool) { +func setupRestoreFiles(compressionType string, withPlugin bool) { dataFile := dataFileFullPath if withPlugin { dataFile = pluginBackupPath } + f, _ := os.Create(oidFile) _, _ = f.WriteString("1\n3\n") - if withCompression { + + if compressionType == "gzip" { f, _ := os.Create(dataFile + ".gz") + defer f.Close() gzipf := gzip.NewWriter(f) - defer func() { - _ = gzipf.Close() - }() + defer gzipf.Close() _, _ = gzipf.Write([]byte(expectedData)) + } else if compressionType == "zstd" { + f, _ := os.Create(dataFile + ".zst") + defer f.Close() + zstdf, _ := zstd.NewWriter(f) + defer zstdf.Close() + _, _ = zstdf.Write([]byte(expectedData)) } else { f, _ := os.Create(dataFile) _, _ = f.WriteString(expectedData) @@ -306,28 +354,57 @@ func assertErrorsHandled() { Expect(err).ToNot(HaveOccurred()) Expect(pipes).To(BeEmpty()) } -func assertBackupArtifacts(withCompression bool, withPlugin bool) { + +func assertBackupArtifacts(withPlugin bool) { var contents []byte var err error dataFile := dataFileFullPath if withPlugin { dataFile = pluginBackupPath } - if withCompression { + contents, err = ioutil.ReadFile(dataFile) + Expect(err).ToNot(HaveOccurred()) + Expect(string(contents)).To(Equal(expectedData)) + + contents, err = ioutil.ReadFile(tocFile) + Expect(err).ToNot(HaveOccurred()) + Expect(string(contents)).To(Equal(expectedTOC)) + assertNoErrors() +} + +func assertBackupArtifactsWithCompression(compressionType string, withPlugin bool) { + var contents []byte + var err error + + dataFile := dataFileFullPath + if withPlugin { + dataFile = pluginBackupPath + } + + if compressionType == "gzip" { contents, err = ioutil.ReadFile(dataFile + ".gz") - Expect(err).ToNot(HaveOccurred()) + } else if compressionType == "zstd" { + contents, err = ioutil.ReadFile(dataFile + ".zst") + } else { + Fail("unknown compression type " + compressionType) + } + Expect(err).ToNot(HaveOccurred()) + + if compressionType == "gzip" { r, _ := gzip.NewReader(bytes.NewReader(contents)) contents, _ = ioutil.ReadAll(r) - + } else if compressionType == "zstd" { + r, _ := zstd.NewReader(bytes.NewReader(contents)) + contents, _ = ioutil.ReadAll(r) } else { - contents, err = ioutil.ReadFile(dataFile) - Expect(err).ToNot(HaveOccurred()) + Fail("unknown compression type " + compressionType) } Expect(string(contents)).To(Equal(expectedData)) contents, err = ioutil.ReadFile(tocFile) Expect(err).ToNot(HaveOccurred()) Expect(string(contents)).To(Equal(expectedTOC)) + assertNoErrors() } diff --git a/options/flag.go b/options/flag.go index 9f7e0b591..215e17500 100644 --- a/options/flag.go +++ b/options/flag.go @@ -52,8 +52,8 @@ const ( func SetBackupFlagDefaults(flagSet *pflag.FlagSet) { flagSet.String(BACKUP_DIR, "", "The absolute path of the directory to which all backup files will be written") - flagSet.String(COMPRESSION_TYPE, "gzip", "Type of compression to use during data backup. Valid values are: 'gzip'") - flagSet.Int(COMPRESSION_LEVEL, 1, "Level of compression to use during data backup. Valid values are between 1 and 9.") + flagSet.String(COMPRESSION_TYPE, "gzip", "Type of compression to use during data backup. Valid values are 'gzip', 'zstd'") + flagSet.Int(COMPRESSION_LEVEL, 1, "Level of compression to use during data backup. Range of valid values depends on compression type") flagSet.Bool(DATA_ONLY, false, "Only back up data, do not back up metadata") flagSet.String(DBNAME, "", "The database to be backed up") flagSet.Bool(DEBUG, false, "Print verbose and debug log messages") diff --git a/restore/data_test.go b/restore/data_test.go index c439731cb..b2ddffdb8 100644 --- a/restore/data_test.go +++ b/restore/data_test.go @@ -21,7 +21,7 @@ var _ = Describe("restore/data tests", func() { backup.SetPluginConfig(nil) _ = cmdFlags.Set(options.PLUGIN_CONFIG, "") }) - It("will restore a table from its own file with compression", func() { + It("will restore a table from its own file with gzip compression", func() { utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "gzip", OutputCommand: "gzip -c -1", InputCommand: "gzip -d -c", Extension: ".gz"}) execStr := regexp.QuoteMeta("COPY public.foo(i,j) FROM PROGRAM 'cat /backups/20170101/20170101010101/gpbackup__20170101010101_3456.gz | gzip -d -c' WITH CSV DELIMITER ',' ON SEGMENT;") mock.ExpectExec(execStr).WillReturnResult(sqlmock.NewResult(10, 0)) @@ -30,6 +30,15 @@ var _ = Describe("restore/data tests", func() { Expect(err).ShouldNot(HaveOccurred()) }) + It("will restore a table from its own file with zstd compression", func() { + utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "zstd", OutputCommand: "zstd --compress -1 -c", InputCommand: "zstd --decompress -c", Extension: ".zst"}) + execStr := regexp.QuoteMeta("COPY public.foo(i,j) FROM PROGRAM 'cat /backups/20170101/20170101010101/gpbackup__20170101010101_3456.zst | zstd --decompress -c' WITH CSV DELIMITER ',' ON SEGMENT;") + mock.ExpectExec(execStr).WillReturnResult(sqlmock.NewResult(10, 0)) + filename := "/backups/20170101/20170101010101/gpbackup__20170101010101_3456.zst" + _, err := restore.CopyTableIn(connectionPool, "public.foo", "(i,j)", filename, false, 0) + + Expect(err).ShouldNot(HaveOccurred()) + }) It("will restore a table from its own file without compression", func() { execStr := regexp.QuoteMeta("COPY public.foo(i,j) FROM PROGRAM 'cat /backups/20170101/20170101010101/gpbackup__20170101010101_3456 | cat -' WITH CSV DELIMITER ',' ON SEGMENT;") mock.ExpectExec(execStr).WillReturnResult(sqlmock.NewResult(10, 0)) @@ -46,7 +55,7 @@ var _ = Describe("restore/data tests", func() { Expect(err).ShouldNot(HaveOccurred()) }) - It("will restore a table from its own file with compression using a plugin", func() { + It("will restore a table from its own file with gzip compression using a plugin", func() { utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "gzip", OutputCommand: "gzip -c -1", InputCommand: "gzip -d -c", Extension: ".gz"}) _ = cmdFlags.Set(options.PLUGIN_CONFIG, "/tmp/plugin_config") pluginConfig := utils.PluginConfig{ExecutablePath: "/tmp/fake-plugin.sh", ConfigPath: "/tmp/plugin_config"} @@ -59,6 +68,19 @@ var _ = Describe("restore/data tests", func() { Expect(err).ShouldNot(HaveOccurred()) }) + It("will restore a table from its own file with zstd compression using a plugin", func() { + utils.SetPipeThroughProgram(utils.PipeThroughProgram{Name: "zstd", OutputCommand: "zstd --compress -1 -c", InputCommand: "zstd --decompress -c", Extension: ".zst"}) + _ = cmdFlags.Set(options.PLUGIN_CONFIG, "/tmp/plugin_config") + pluginConfig := utils.PluginConfig{ExecutablePath: "/tmp/fake-plugin.sh", ConfigPath: "/tmp/plugin_config"} + restore.SetPluginConfig(&pluginConfig) + execStr := regexp.QuoteMeta("COPY public.foo(i,j) FROM PROGRAM '/tmp/fake-plugin.sh restore_data /tmp/plugin_config /backups/20170101/20170101010101/gpbackup__20170101010101_pipe_3456.zst | zstd --decompress -c' WITH CSV DELIMITER ',' ON SEGMENT;") + mock.ExpectExec(execStr).WillReturnResult(sqlmock.NewResult(10, 0)) + + filename := "/backups/20170101/20170101010101/gpbackup__20170101010101_pipe_3456.zst" + _, err := restore.CopyTableIn(connectionPool, "public.foo", "(i,j)", filename, false, 0) + + Expect(err).ShouldNot(HaveOccurred()) + }) It("will restore a table from its own file without compression using a plugin", func() { _ = cmdFlags.Set(options.PLUGIN_CONFIG, "/tmp/plugin_config") pluginConfig := utils.PluginConfig{ExecutablePath: "/tmp/fake-plugin.sh", ConfigPath: "/tmp/plugin_config"} diff --git a/utils/compression.go b/utils/compression.go index 3908bb2bf..cc9cff06b 100644 --- a/utils/compression.go +++ b/utils/compression.go @@ -28,6 +28,11 @@ func InitializePipeThroughParameters(compress bool, compressionType string, comp pipeThroughProgram = PipeThroughProgram{Name: "gzip", OutputCommand: fmt.Sprintf("gzip -c -%d", compressionLevel), InputCommand: "gzip -d -c", Extension: ".gz"} return } + + if compressionType == "zstd" { + pipeThroughProgram = PipeThroughProgram{Name: "zstd", OutputCommand: fmt.Sprintf("zstd --compress -%d -c", compressionLevel), InputCommand: "zstd --decompress -c", Extension: ".zst"} + return + } } func GetPipeThroughProgram() PipeThroughProgram { diff --git a/utils/compression_test.go b/utils/compression_test.go index 924b8e0ea..7fff44e7d 100644 --- a/utils/compression_test.go +++ b/utils/compression_test.go @@ -43,7 +43,7 @@ var _ = Describe("utils/compression tests", func() { resultProgram := utils.GetPipeThroughProgram() structmatcher.ExpectStructsToMatch(&expectedProgram, &resultProgram) }) - It("initializes to use gzip when passed compression and a level", func() { + It("initializes to use gzip when passed compression type gzip and a level", func() { originalProgram := utils.GetPipeThroughProgram() defer utils.SetPipeThroughProgram(originalProgram) expectedProgram := utils.PipeThroughProgram{ @@ -56,5 +56,18 @@ var _ = Describe("utils/compression tests", func() { resultProgram := utils.GetPipeThroughProgram() structmatcher.ExpectStructsToMatch(&expectedProgram, &resultProgram) }) + It("initializes to use zstd when passed compression type zstd and a level", func() { + originalProgram := utils.GetPipeThroughProgram() + defer utils.SetPipeThroughProgram(originalProgram) + expectedProgram := utils.PipeThroughProgram{ + Name: "zstd", + OutputCommand: "zstd --compress -7 -c", + InputCommand: "zstd --decompress -c", + Extension: ".zst", + } + utils.InitializePipeThroughParameters(true, "zstd", 7) + resultProgram := utils.GetPipeThroughProgram() + structmatcher.ExpectStructsToMatch(&expectedProgram, &resultProgram) + }) }) }) diff --git a/utils/util.go b/utils/util.go index be89f914c..c782de669 100644 --- a/utils/util.go +++ b/utils/util.go @@ -109,19 +109,21 @@ func ValidateFullPath(path string) error { return nil } +// A description of compression levels for some compression type +type CompressionLevelsDescription struct { + Min int + Max int +} + func ValidateCompressionTypeAndLevel(compressionType string, compressionLevel int) error { - minAllowedCompressionLevels := map[string]int{ - "gzip": 1, + compressionLevelsForType := map[string]CompressionLevelsDescription{ + "gzip": {Min: 1, Max: 9}, + "zstd": {Min: 1, Max: 19}, } - maxAllowedCompressionLevels := map[string]int{ - "gzip": 9, - } - - if maxAllowedLevel, ok := maxAllowedCompressionLevels[compressionType]; ok { - minAllowedLevel := minAllowedCompressionLevels[compressionType] - if compressionLevel < minAllowedLevel || compressionLevel > maxAllowedLevel { - return fmt.Errorf("compression type '%s' only allows compression levels between %d and %d, but the provided level is %d", compressionType, minAllowedLevel, maxAllowedLevel, compressionLevel) + if levelsDescription, ok := compressionLevelsForType[compressionType]; ok { + if compressionLevel < levelsDescription.Min || compressionLevel > levelsDescription.Max { + return fmt.Errorf("compression type '%s' only allows compression levels between %d and %d, but the provided level is %d", compressionType, levelsDescription.Min, levelsDescription.Max, compressionLevel) } } else { return fmt.Errorf("unknown compression type '%s'", compressionType) diff --git a/utils/util_test.go b/utils/util_test.go index 0a3bb3cfd..6bcbd44a7 100644 --- a/utils/util_test.go +++ b/utils/util_test.go @@ -140,6 +140,24 @@ var _ = Describe("utils/util tests", func() { err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) Expect(err).To(MatchError("unknown compression type ''")) }) + It("validates a compression type 'zstd' and a level between 1 and 19", func() { + compressType := "zstd" + compressLevel := 11 + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(Not(HaveOccurred())) + }) + It("panics if given a compression type 'zstd' and a compression level < 1", func() { + compressType := "zstd" + compressLevel := 0 + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(MatchError("compression type 'zstd' only allows compression levels between 1 and 19, but the provided level is 0")) + }) + It("panics if given a compression type 'gzip' and a compression level > 19", func() { + compressType := "zstd" + compressLevel := 20 + err := utils.ValidateCompressionTypeAndLevel(compressType, compressLevel) + Expect(err).To(MatchError("compression type 'zstd' only allows compression levels between 1 and 19, but the provided level is 20")) + }) }) Describe("UnquoteIdent", func() { It("returns unchanged ident when passed a single char", func() { From ed228a417a7be490c51181291f5a347e5d23c8d6 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Fri, 20 Aug 2021 19:22:34 +1000 Subject: [PATCH 05/25] ADBDEV-1918 Implement scripts ti run gpbackup tests (#6) * scripts to run test on CI * ADBDEV-1953 - Handle arenadata suffix in version (#5) Handle arenadata suffix in a version for gpbackup and gprestore. At first, compare Arenadata patch version. If ok, shrink arenadata suffix and compare upstream versions. Co-authored-by: Polina Bungina --- arenadata/Dockerfile | 3 +++ arenadata/README.md | 6 +++++ arenadata/arenadata_helper.go | 42 +++++++++++++++++++++++++++++++ arenadata/run_gpbackup_tests.bash | 19 ++++++++++++++ report/report.go | 9 +++++++ report/report_test.go | 20 +++++++++++++++ 6 files changed, 99 insertions(+) create mode 100644 arenadata/Dockerfile create mode 100644 arenadata/README.md create mode 100644 arenadata/arenadata_helper.go create mode 100644 arenadata/run_gpbackup_tests.bash diff --git a/arenadata/Dockerfile b/arenadata/Dockerfile new file mode 100644 index 000000000..4fae6af0f --- /dev/null +++ b/arenadata/Dockerfile @@ -0,0 +1,3 @@ +FROM hub.adsw.io/library/gpdb6_regress:latest + +COPY . /home/gpadmin/go/src/github.com/greenplum-db/gpbackup diff --git a/arenadata/README.md b/arenadata/README.md new file mode 100644 index 000000000..29fb1c8fe --- /dev/null +++ b/arenadata/README.md @@ -0,0 +1,6 @@ +## How to run tests + +```bash +docker build -t gpbackup:test -f arenadata/Dockerfile . +docker run --rm -it --sysctl 'kernel.sem=500 1024000 200 4096' gpbackup:test bash -c "ssh-keygen -A && /usr/sbin/sshd && bash /home/gpadmin/go/src/github.com/greenplum-db/gpbackup/arenadata/run_gpbackup_tests.bash" +``` diff --git a/arenadata/arenadata_helper.go b/arenadata/arenadata_helper.go new file mode 100644 index 000000000..afc08f9d5 --- /dev/null +++ b/arenadata/arenadata_helper.go @@ -0,0 +1,42 @@ +package arenadata + +import ( + "regexp" + "strconv" + + "github.com/greenplum-db/gp-common-go-libs/gplog" + "github.com/pkg/errors" +) + +var ( + adPattern = regexp.MustCompile(`_arenadata(\d+)`) +) + +func EnsureAdVersionCompatibility(backupVersion string, restoreVersion string) { + var ( + adBackup, adRestore int + err error + ) + if strVersion := adPattern.FindAllStringSubmatch(backupVersion, -1); len(strVersion) > 0 { + adBackup, err = strconv.Atoi(strVersion[0][1]) + gplog.FatalOnError(err) + } else { + gplog.Fatal(errors.Errorf("Invalid arenadata version format for gpbackup: %s", backupVersion), "") + } + if strVersion := adPattern.FindAllStringSubmatch(restoreVersion, -1); len(strVersion) > 0 { + adRestore, err = strconv.Atoi(strVersion[0][1]) + gplog.FatalOnError(err) + } else { + gplog.Fatal(errors.Errorf("Invalid arenadata version format for gprestore: %s", restoreVersion), "") + } + if adRestore < adBackup { + gplog.Fatal(errors.Errorf("gprestore arenadata%d cannot restore a backup taken with gpbackup arenadata%d; please use gprestore arenadata%d or later.", + adRestore, adBackup, adBackup), "") + } +} + +// fullVersion: gpbackup version + '_' + arenadata release + ('+' + gpbackup build) +// example: 1.20.4_arenadata2+dev.1.g768b7e0 -> 1.20.4+dev.1.g768b7e0 +func GetOriginalVersion(fullVersion string) string { + return adPattern.ReplaceAllString(fullVersion, "") +} diff --git a/arenadata/run_gpbackup_tests.bash b/arenadata/run_gpbackup_tests.bash new file mode 100644 index 000000000..174ff9d18 --- /dev/null +++ b/arenadata/run_gpbackup_tests.bash @@ -0,0 +1,19 @@ +#!/bin/bash -l + +set -eox pipefail + +source gpdb_src/concourse/scripts/common.bash +install_and_configure_gpdb +make -C gpdb_src/src/test/regress/ +make -C gpdb_src/contrib/dummy_seclabel/ install +gpdb_src/concourse/scripts/setup_gpadmin_user.bash +make_cluster + +su - gpadmin -c " +source /usr/local/greenplum-db-devel/greenplum_path.sh; +source ~/gpdb_src/gpAux/gpdemo/gpdemo-env.sh; +gpconfig -c shared_preload_libraries -v dummy_seclabel; +gpstop -ar; +wget https://golang.org/dl/go1.14.15.linux-amd64.tar.gz; +tar -C ~/ -xzf go1.14.15.linux-amd64.tar.gz; +PATH=$PATH:~/go/bin GOPATH=~/go make depend build install test end_to_end -C go/src/github.com/greenplum-db/gpbackup/" diff --git a/report/report.go b/report/report.go index 58ca64828..9bf8d751c 100644 --- a/report/report.go +++ b/report/report.go @@ -15,6 +15,7 @@ import ( "github.com/greenplum-db/gp-common-go-libs/gplog" "github.com/greenplum-db/gp-common-go-libs/iohelper" "github.com/greenplum-db/gp-common-go-libs/operating" + "github.com/greenplum-db/gpbackup/arenadata" "github.com/greenplum-db/gpbackup/history" "github.com/greenplum-db/gpbackup/utils" "github.com/pkg/errors" @@ -289,6 +290,14 @@ func PrintObjectCounts(reportFile io.WriteCloser, objectCounts map[string]int) { * users will never use a +dev version in production. */ func EnsureBackupVersionCompatibility(backupVersion string, restoreVersion string) { + if strings.Index(restoreVersion, "_arenadata") != -1 { + // arenadata build + if strings.Index(backupVersion, "_arenadata") != -1 { + arenadata.EnsureAdVersionCompatibility(backupVersion, restoreVersion) + backupVersion = arenadata.GetOriginalVersion(backupVersion) + } + restoreVersion = arenadata.GetOriginalVersion(restoreVersion) + } backupSemVer, err := semver.Make(backupVersion) gplog.FatalOnError(err) restoreSemVer, err := semver.Make(restoreVersion) diff --git a/report/report_test.go b/report/report_test.go index 1d603ce61..a23aaf676 100644 --- a/report/report_test.go +++ b/report/report_test.go @@ -392,6 +392,26 @@ restore status: Success but non-fatal errors occurred. See log file .+ for It("Does not panic if gpbackup version equals gprestore version", func() { EnsureBackupVersionCompatibility("0.1.0", "0.1.0") }) + It("Panics if gpbackup version is greater than gprestore version (arenadata build)", func() { + defer testhelper.ShouldPanicWithMessage("gprestore arenadata9 cannot restore a backup taken with gpbackup arenadata10; please use gprestore arenadata10 or later.") + EnsureBackupVersionCompatibility("1.21.0_arenadata10", "1.21.0_arenadata9") + }) + It("Does not panic if gpbackup version is less than gprestore version (arenadata build)", func() { + EnsureBackupVersionCompatibility("1.21.0_arenadata9", "1.21.0_arenadata10") + }) + It("Does not panic if gpbackup version equals gprestore version (arenadata build)", func() { + EnsureBackupVersionCompatibility("1.20.4_arenadata1", "1.20.4_arenadata1") + }) + It("Does not panic if gpbackup version equals gprestore version (arenadata dev build)", func() { + EnsureBackupVersionCompatibility("1.20.4_arenadata2+dev.1.g768b7e0", "1.20.4_arenadata2+dev.1.g768b7e0") + }) + It("Does not panic if gpbackup has upstream version, while gprestore has arenadata version", func() { + EnsureBackupVersionCompatibility("1.20.4", "1.20.4_arenadata2") + }) + It("Panics if gpbackup has upstream version, gprestore has arenadata version and gpbackup version is greater", func() { + defer testhelper.ShouldPanicWithMessage("gprestore 1.20.4 cannot restore a backup taken with gpbackup 1.20.5; please use gprestore 1.20.5 or later.") + EnsureBackupVersionCompatibility("1.20.5", "1.20.4_arenadata2") + }) }) Describe("EnsureDatabaseVersionCompatibility", func() { var restoreVersion dbconn.GPDBVersion From 2c9c8dfe252c314caa8556a4fe8795e551047d38 Mon Sep 17 00:00:00 2001 From: Kevin Yeap Date: Wed, 13 Oct 2021 17:53:26 -0500 Subject: [PATCH 06/25] Disable incremental metadata query without leaf-partition-data As an optimization step, we can skip gathering information for incremental metadata when the --leaf-partition-data flag is not specified. This is because we do not support taking incremental backups on full backups without the --leaf-partition-data flag. Co-authored-by: Kevin Yeap Co-authored-by: Shivram Mani --- backup/backup.go | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backup/backup.go b/backup/backup.go index 8c139705e..5191095f7 100644 --- a/backup/backup.go +++ b/backup/backup.go @@ -119,8 +119,11 @@ func DoBackup() { gplog.Info("Gathering table state information") metadataTables, dataTables := RetrieveAndProcessTables() - if !(MustGetFlagBool(options.METADATA_ONLY) || MustGetFlagBool(options.DATA_ONLY)) { + // This must be a full backup with --leaf-parition-data to query for incremental metadata + if !(MustGetFlagBool(options.METADATA_ONLY) || MustGetFlagBool(options.DATA_ONLY)) && MustGetFlagBool(options.LEAF_PARTITION_DATA) { backupIncrementalMetadata() + } else { + gplog.Verbose("Skipping query for incremental metadata.") } CheckTablesContainData(dataTables) metadataFilename := globalFPInfo.GetMetadataFilePath() From 4b951b6e3098699c5c7c8945559cc191a16fd6c0 Mon Sep 17 00:00:00 2001 From: hughcapet Date: Thu, 9 Sep 2021 16:46:33 +0300 Subject: [PATCH 07/25] Prohibit inclusion of leaf partitions without --leaf-partition-data With this commit gpbackup avoids situation, when it is possible to specify --include-table(-file) flag with a leaf partition table and (optionally) with the parent table but without --leaf-partition-data flag. It is necessary to force user to use --leaf-partition-data flag, otherwise gpbackup executes COPY operation for both: parent and leaf partition tables, thus data in partition table is backed up (and restored) twice. --- backup/validate.go | 3 +++ backup/validate_test.go | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/backup/validate.go b/backup/validate.go index a93efaf86..e13fc6cad 100644 --- a/backup/validate.go +++ b/backup/validate.go @@ -88,6 +88,9 @@ func ValidateTablesExist(conn *dbconn.DBConn, tableList []string, excludeSet boo if partTableMap[tableOid].Level == "i" { gplog.Fatal(nil, "Cannot filter on %s, as it is an intermediate partition table. Only parent partition tables and leaf partition tables may be specified.", table) } + if partTableMap[tableOid].Level == "l" && !MustGetFlagBool(options.LEAF_PARTITION_DATA) { + gplog.Fatal(nil, "--leaf-partition-data flag must be specified to filter on %s, as it is a leaf partition table.", table) + } } } diff --git a/backup/validate_test.go b/backup/validate_test.go index 9d46138e8..ec91ee372 100644 --- a/backup/validate_test.go +++ b/backup/validate_test.go @@ -150,6 +150,7 @@ var _ = Describe("backup/validate tests", func() { partitionTables.AddRow("1", "l", "root") mock.ExpectQuery("SELECT (.*)").WillReturnRows(partitionTables) filterList = []string{"public.table1"} + _ = cmdFlags.Set(options.LEAF_PARTITION_DATA, "true") backup.ValidateTablesExist(connectionPool, filterList, false) }) It("panics if given an intermediate partition table and --leaf-partition-data is set", func() { @@ -179,6 +180,19 @@ var _ = Describe("backup/validate tests", func() { defer testhelper.ShouldPanicWithMessage("Cannot filter on public.table1, as it is an intermediate partition table. Only parent partition tables and leaf partition tables may be specified.") backup.ValidateTablesExist(connectionPool, filterList, false) }) + It("panics if given a leaf partition table and --leaf-partition-data is not set", func() { + // Added to handle call to `quote_ident` + schemaAndTable.AddRow("public", "table1") + mock.ExpectQuery("SELECT (.*)").WillReturnRows(schemaAndTable) + // + tableRows.AddRow("1", "public.table1") + mock.ExpectQuery("SELECT (.*)").WillReturnRows(tableRows) + partitionTables.AddRow("1", "l", "root") + mock.ExpectQuery("SELECT (.*)").WillReturnRows(partitionTables) + filterList = []string{"public.table1"} + defer testhelper.ShouldPanicWithMessage("--leaf-partition-data flag must be specified to filter on public.table1, as it is a leaf partition table.") + backup.ValidateTablesExist(connectionPool, filterList, false) + }) }) }) Describe("Validate various flag combinations that are required or exclusive", func() { From 279efb502426b05098b975cad62deabc6bce6421 Mon Sep 17 00:00:00 2001 From: hughcapet Date: Tue, 17 Aug 2021 08:03:21 +0300 Subject: [PATCH 08/25] Order tables for backup by relpages value Currently the output of RetrieveAndProcessTables function is ordered by oid, which is not always optimal. This commit orders the list of tables for backup by their size (by replages value from pg_class actually). It is necessary to remember, that it only makes sense, when relpages value is updated by either vacuum, analyze or a few ddl commands (such as create index) There is also a wrapper integration test (RetrieveAndProcessTables) fixed: it used empty flag set and didn't properly test tables inclusion --- backup/queries_relations.go | 58 +++++++--- helper/helper.go | 2 - integration/wrappers_test.go | 211 ++++++++++++++++++++++++++++++++++- options/options.go | 77 +++++++------ 4 files changed, 291 insertions(+), 57 deletions(-) diff --git a/backup/queries_relations.go b/backup/queries_relations.go index 28238bcd0..4ac072902 100644 --- a/backup/queries_relations.go +++ b/backup/queries_relations.go @@ -91,17 +91,29 @@ func getUserTableRelations(connectionPool *dbconn.DBConn) []Relation { } query := fmt.Sprintf(` - SELECT n.oid AS schemaoid, - c.oid AS oid, - quote_ident(n.nspname) AS schema, - quote_ident(c.relname) AS name - FROM pg_class c + SELECT schemaoid, oid, schema, name FROM ( + SELECT n.oid AS schemaoid, + c.oid AS oid, + quote_ident(n.nspname) AS schema, + quote_ident(c.relname) AS name, + coalesce(prt.pages, c.relpages) AS pages + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid - WHERE %s - %s - AND relkind = 'r' - AND %s - ORDER BY c.oid`, + LEFT JOIN ( + SELECT + p.parrelid, + sum(pc.relpages) AS pages + FROM pg_partition_rule AS pr + JOIN pg_partition AS p ON pr.paroid = p.oid + JOIN pg_class AS pc ON pr.parchildrelid = pc.oid + GROUP BY p.parrelid + ) AS prt ON prt.parrelid = c.oid + WHERE %s + %s + AND relkind = 'r' + AND %s + ) res + ORDER BY pages DESC, oid`, relationAndSchemaFilterClause(), childPartitionFilter, ExtensionFilterClause("c")) results := make([]Relation, 0) @@ -115,15 +127,27 @@ func getUserTableRelationsWithIncludeFiltering(connectionPool *dbconn.DBConn, in includeOids := getOidsFromRelationList(connectionPool, includedRelationsQuoted) oidStr := strings.Join(includeOids, ", ") query := fmt.Sprintf(` - SELECT n.oid AS schemaoid, - c.oid AS oid, - quote_ident(n.nspname) AS schema, - quote_ident(c.relname) AS name - FROM pg_class c + SELECT schemaoid, oid, schema, name FROM ( + SELECT n.oid AS schemaoid, + c.oid AS oid, + quote_ident(n.nspname) AS schema, + quote_ident(c.relname) AS name, + coalesce(prt.pages, c.relpages) AS pages + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid - WHERE c.oid IN (%s) + LEFT JOIN ( + SELECT + p.parrelid, + sum(pc.relpages) AS pages + FROM pg_partition_rule AS pr + JOIN pg_partition AS p ON pr.paroid = p.oid + JOIN pg_class AS pc ON pr.parchildrelid = pc.oid + GROUP BY p.parrelid + ) AS prt ON prt.parrelid = c.oid + WHERE c.oid IN (%s) AND (relkind = 'r') - ORDER BY c.oid`, oidStr) + ) res + ORDER BY pages DESC, oid`, oidStr) results := make([]Relation, 0) err := connectionPool.Select(&results, query) diff --git a/helper/helper.go b/helper/helper.go index 899c69fd4..af559a4d1 100644 --- a/helper/helper.go +++ b/helper/helper.go @@ -8,7 +8,6 @@ import ( "os" "os/signal" "runtime/debug" - "sort" "strconv" "strings" "sync" @@ -146,7 +145,6 @@ func getOidListFromFile() ([]int, error) { num, _ := strconv.Atoi(oid) oidList[i] = num } - sort.Ints(oidList) return oidList, nil } diff --git a/integration/wrappers_test.go b/integration/wrappers_test.go index 63c0f58e7..c110b657d 100644 --- a/integration/wrappers_test.go +++ b/integration/wrappers_test.go @@ -12,16 +12,18 @@ import ( ) var _ = Describe("Wrappers Integration", func() { + BeforeEach(func() { + gplog.SetVerbosity(gplog.LOGERROR) // turn off progress bar in the lock-table routine + }) Describe("RetrieveAndProcessTables", func() { BeforeEach(func() { - gplog.SetVerbosity(gplog.LOGERROR) // turn off progress bar in the lock-table routine - var rootCmd = &cobra.Command{} + rootCmd := &cobra.Command{} + includes := []string{"--include-table", "public.foo", "--include-table", "public.BAR"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() }) It("returns the data tables that have names with special characters", func() { - _ = backupCmdFlags.Set(options.INCLUDE_RELATION, "public.foo") - _ = backupCmdFlags.Set(options.INCLUDE_RELATION, "public.BAR") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.foo(i int); INSERT INTO public.foo VALUES (1);") testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public."BAR"(i int); INSERT INTO public."BAR" VALUES (1);`) defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.foo;") @@ -37,4 +39,203 @@ var _ = Describe("Wrappers Integration", func() { Expect(dataTables[1].Name).To(Equal(`"BAR"`)) }) }) + Describe("Tables order when no filtering is used or tables filtering is defined", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.empty(i int);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.thousands(i int); INSERT INTO public.thousands SELECT generate_series(0, 10000);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + connectionPool.MustBegin(0) + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.empty;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.thousands;") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages)", func() { + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(3)) + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("ten")) + Expect(dataTables[2].Name).To(Equal("empty")) + }) + It("returns the data tables in descending order of their sizes (relpages) when include-tables(-file) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--include-table", "public.empty", "--include-table", "public.ten"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("ten")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + It("returns the data tables in descending order of their sizes (relpages) when exclude-tables(s) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--exclude-table", "public.thousands"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("ten")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + }) + Describe("Tables order when schema filtering is defined", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE SCHEMA filterschema;") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE filterschema.empty(i int);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE filterschema.thousands(i int); INSERT INTO filterschema.thousands SELECT generate_series(0, 1000);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + connectionPool.MustBegin(0) + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE filterschema.empty;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE filterschema.thousands;") + testhelper.AssertQueryRuns(connectionPool, "DROP SCHEMA filterschema;") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages) when include-schema(s) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--include-schema", "filterschema"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + It("returns the data tables in descending order of their sizes (relpages) when exclude-schema(s) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--exclude-schema", "public"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + }) + Describe("Tables order cases, when there is a partitioned table to backup", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public.partition_table (id int, gender char(1)) + DISTRIBUTED BY (id) + PARTITION BY LIST (gender) + ( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other );`) + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.partition_table VALUES (generate_series(0,10000), 'F');") + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.partition_table VALUES (generate_series(0,10), NULL);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + connectionPool.MustBegin(0) + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.partition_table") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages), when there is a partitioned table to backup", func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") + defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + opts, _ := options.NewOptions(rootCmd.Flags()) + err := opts.ExpandIncludesForPartitions(connectionPool, rootCmd.Flags()) + Expect(err).ShouldNot(HaveOccurred()) + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("partition_table")) + Expect(dataTables[1].Name).To(Equal("ten")) + }) + It("returns the data tables in descending order of their sizes (relpages), when there is a partitioned table to backup and leaf-partition-data flag is set", func() { + rootCmd := &cobra.Command{} + includes := []string{"--leaf-partition-data"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + opts, _ := options.NewOptions(rootCmd.Flags()) + err := opts.ExpandIncludesForPartitions(connectionPool, rootCmd.Flags()) + Expect(err).ShouldNot(HaveOccurred()) + + _, dataTables := backup.RetrieveAndProcessTables() + + Expect(len(dataTables)).To(Equal(3)) + Expect(dataTables[0].Name).To(Equal("partition_table_1_prt_girls")) + Expect(dataTables[1].Name).To(Equal("partition_table_1_prt_other")) + Expect(dataTables[2].Name).To(Equal("partition_table_1_prt_boys")) + }) + }) + Describe("Tables order cases, when there is an AO/CO table to backup", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.empty(i int);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.thousands(i int); INSERT INTO public.thousands SELECT generate_series(0, 10000);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.empty;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.thousands;") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages), when there is an AO table to backup", func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.hundred(i int) WITH (appendonly=true) DISTRIBUTED RANDOMLY;") + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.hundred SELECT generate_series(0, 100);") + defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.hundred;") + testhelper.AssertQueryRuns(connectionPool, "VACUUM public.hundred") // relpages of AOCO is not updated by ANALYZE + + connectionPool.MustBegin(0) //VACUUM cannot be run inside a transaction block + + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(3)) + + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("hundred")) + Expect(dataTables[2].Name).To(Equal("empty")) + + }) + It("returns the data tables in descending order of their sizes (relpages), when there is an AOCO table to backup", func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.hundred(i int) WITH (appendonly=true, orientation=column) DISTRIBUTED RANDOMLY;") + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.hundred SELECT generate_series(0, 100);") + defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.hundred;") + testhelper.AssertQueryRuns(connectionPool, "VACUUM") // relpages of AOCO is not updated by ANALYZE + + connectionPool.MustBegin(0) //VACUUM cannot be run inside a transaction block + + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(3)) + + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("hundred")) + Expect(dataTables[2].Name).To(Equal("empty")) + }) + }) }) diff --git a/options/options.go b/options/options.go index f3e823a5c..8ae5826e8 100644 --- a/options/options.go +++ b/options/options.go @@ -302,43 +302,54 @@ func (o Options) getUserTableRelationsWithIncludeFiltering(connectionPool *dbcon } query := fmt.Sprintf(` -SELECT - n.nspname AS schemaname, - c.relname AS tablename -FROM pg_class c -JOIN pg_namespace n - ON c.relnamespace = n.oid -WHERE %s -AND ( - -- Get tables in the include list - c.oid IN (%s) - -- Get parent partition tables whose children are in the include list - OR c.oid IN ( - SELECT - p.parrelid - FROM pg_partition p - JOIN pg_partition_rule r ON p.oid = r.paroid - WHERE p.paristemplate = false - AND r.parchildrelid IN (%s) - ) - -- Get external partition tables whose siblings are in the include list - OR c.oid IN ( +SELECT schemaname, tablename FROM ( + SELECT c.oid AS oid, + n.nspname AS schemaname, + c.relname AS tablename, + coalesce(prt.pages, c.relpages) AS pages + FROM pg_class c + JOIN pg_namespace n ON c.relnamespace = n.oid + LEFT JOIN ( SELECT - r.parchildrelid - FROM pg_partition_rule r - JOIN pg_exttable e ON r.parchildrelid = e.reloid - WHERE r.paroid IN ( + p.parrelid, + sum(pc.relpages) AS pages + FROM pg_partition_rule AS pr + JOIN pg_partition AS p ON pr.paroid = p.oid + JOIN pg_class AS pc ON pr.parchildrelid = pc.oid + GROUP BY p.parrelid + ) AS prt ON prt.parrelid = c.oid + WHERE %s + AND ( + -- Get tables in the include list + c.oid IN (%s) + -- Get parent partition tables whose children are in the include list + OR c.oid IN ( SELECT - pr.paroid - FROM pg_partition_rule pr - WHERE pr.parchildrelid IN (%s) + p.parrelid + FROM pg_partition p + JOIN pg_partition_rule r ON p.oid = r.paroid + WHERE p.paristemplate = false + AND r.parchildrelid IN (%s) ) + -- Get external partition tables whose siblings are in the include list + OR c.oid IN ( + SELECT + r.parchildrelid + FROM pg_partition_rule r + JOIN pg_exttable e ON r.parchildrelid = e.reloid + WHERE r.paroid IN ( + SELECT + pr.paroid + FROM pg_partition_rule pr + WHERE pr.parchildrelid IN (%s) + ) + ) + %s ) - %s -) -AND (relkind = 'r' OR relkind = 'f') -AND %s -ORDER BY c.oid;`, o.schemaFilterClause("n"), oidStr, oidStr, oidStr, childPartitionFilter, ExtensionFilterClause("c")) + AND (relkind = 'r' OR relkind = 'f') + AND %s +) res +ORDER BY pages DESC, oid;`, o.schemaFilterClause("n"), oidStr, oidStr, oidStr, childPartitionFilter, ExtensionFilterClause("c")) results := make([]FqnStruct, 0) err = connectionPool.Select(&results, query) From c17bbdff2f27511a72da3b7fb9f0aced708cb4a5 Mon Sep 17 00:00:00 2001 From: hughcapet Date: Thu, 28 Oct 2021 14:49:23 +0700 Subject: [PATCH 09/25] Fix TerminateHangingCopySessions to work properly with GPDB6 TerminateHangingCopySessions function used to query old names of columns from pg_stat_activity table. This also used to cause periodic fail of SIGINT end2end tests. --- utils/util.go | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/utils/util.go b/utils/util.go index c782de669..283d9439e 100644 --- a/utils/util.go +++ b/utils/util.go @@ -148,13 +148,23 @@ func InitializeSignalHandler(cleanupFunc func(bool), procDesc string, termFlag * // TODO: Uniquely identify COPY commands in the multiple data file case to allow terminating sessions func TerminateHangingCopySessions(connectionPool *dbconn.DBConn, fpInfo filepath.FilePathInfo, appName string) { + var query string copyFileName := fpInfo.GetSegmentPipePathForCopyCommand() - query := fmt.Sprintf(`SELECT - pg_terminate_backend(procpid) -FROM pg_stat_activity -WHERE application_name = '%s' -AND current_query LIKE '%%%s%%' -AND procpid <> pg_backend_pid()`, appName, copyFileName) + if connectionPool.Version.Before("6") { + query = fmt.Sprintf(`SELECT + pg_terminate_backend(procpid) + FROM pg_stat_activity + WHERE application_name = '%s' + AND current_query LIKE '%%%s%%' + AND procpid <> pg_backend_pid()`, appName, copyFileName) + } else { + query = fmt.Sprintf(`SELECT + pg_terminate_backend(pid) + FROM pg_stat_activity + WHERE application_name = '%s' + AND query LIKE '%%%s%%' + AND pid <> pg_backend_pid()`, appName, copyFileName) + } // We don't check the error as the connection may have finished or been previously terminated _, _ = connectionPool.Exec(query) } From 76830c67075ae5e21afe0b0ce1dee7b0ee91418b Mon Sep 17 00:00:00 2001 From: hughcapet Date: Tue, 19 Oct 2021 10:27:25 +0300 Subject: [PATCH 10/25] Handle empty data tables set properly With this commit gpbackup avoids a situation, when an 'out of range' error is raised for an incremental backup run for a database containing non-modified AO tables and/or external/foreign tables only. --- backup/backup.go | 12 ++++++++---- backup/data.go | 27 +++++++++++---------------- backup/data_test.go | 42 ++++++++++++++++++++++++++++-------------- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/backup/backup.go b/backup/backup.go index fd1509155..15598211f 100644 --- a/backup/backup.go +++ b/backup/backup.go @@ -119,13 +119,18 @@ func DoBackup() { gplog.Info("Gathering table state information") metadataTables, dataTables := RetrieveAndProcessTables() + dataTables, numExtOrForeignTables := GetBackupDataSet(dataTables) + if len(dataTables) == 0 { + gplog.Warn("No tables in backup set contain data. Performing metadata-only backup instead.") + backupReport.MetadataOnly = true + } // This must be a full backup with --leaf-parition-data to query for incremental metadata if !(MustGetFlagBool(options.METADATA_ONLY) || MustGetFlagBool(options.DATA_ONLY)) && MustGetFlagBool(options.LEAF_PARTITION_DATA) { backupIncrementalMetadata() } else { gplog.Verbose("Skipping query for incremental metadata.") } - CheckTablesContainData(dataTables) + metadataFilename := globalFPInfo.GetMetadataFilePath() gplog.Info("Metadata will be written to %s", metadataFilename) metadataFile := utils.NewFileWithByteCountFromFile(metadataFilename) @@ -162,6 +167,7 @@ func DoBackup() { backupReport.RestorePlan = PopulateRestorePlan(backupSetTables, targetBackupRestorePlan, dataTables) backupData(backupSetTables) } + printDataBackupWarnings(numExtOrForeignTables) if MustGetFlagBool(options.WITH_STATS) { backupStatistics(metadataTables) } @@ -265,9 +271,7 @@ func backupData(tables []Table) { utils.VerifyHelperVersionOnSegments(version, globalCluster) oidList := make([]string, 0, len(tables)) for _, table := range tables { - if !table.SkipDataBackup() { - oidList = append(oidList, fmt.Sprintf("%d", table.Oid)) - } + oidList = append(oidList, fmt.Sprintf("%d", table.Oid)) } utils.WriteOidListToSegments(oidList, globalCluster, globalFPInfo) utils.CreateFirstSegmentPipeOnAllHosts(oidList[0], globalCluster, globalFPInfo) diff --git a/backup/data.go b/backup/data.go index f35f3759c..7f92cdcde 100644 --- a/backup/data.go +++ b/backup/data.go @@ -110,13 +110,7 @@ func BackupSingleTableData(table Table, rowsCopiedMap map[uint32]int64, counters } func backupDataForAllTables(tables []Table) []map[uint32]int64 { - var numExtOrForeignTables int64 - for _, table := range tables { - if table.SkipDataBackup() { - numExtOrForeignTables++ - } - } - counters := BackupProgressCounters{NumRegTables: 0, TotalRegTables: int64(len(tables)) - numExtOrForeignTables} + counters := BackupProgressCounters{NumRegTables: 0, TotalRegTables: int64(len(tables))} counters.ProgressBar = utils.NewProgressBar(int(counters.TotalRegTables), "Tables backed up: ", utils.PB_INFO) counters.ProgressBar.Start() rowsCopiedMaps := make([]map[uint32]int64, connectionPool.NumConns) @@ -141,10 +135,6 @@ func backupDataForAllTables(tables []Table) []map[uint32]int64 { return } - if table.SkipDataBackup() { - gplog.Verbose("Skipping data backup of table %s because it is either an external or foreign table.", table.FQN()) - continue - } // If a random external SQL command had queued an AccessExclusiveLock acquisition request // against this next table, the --job worker thread would deadlock on the COPY attempt. // To prevent gpbackup from hanging, we attempt to acquire an AccessShareLock on the @@ -225,7 +215,6 @@ func backupDataForAllTables(tables []Table) []map[uint32]int64 { } counters.ProgressBar.Finish() - printDataBackupWarnings(numExtOrForeignTables) return rowsCopiedMaps } @@ -236,16 +225,22 @@ func printDataBackupWarnings(numExtTables int64) { } } -func CheckTablesContainData(tables []Table) { +// Remove external/foreign tables from the data backup set +func GetBackupDataSet(tables []Table) ([]Table, int64) { + var backupDataSet []Table + var numExtOrForeignTables int64 + if !backupReport.MetadataOnly { for _, table := range tables { if !table.SkipDataBackup() { - return + backupDataSet = append(backupDataSet, table) + } else { + gplog.Verbose("Skipping data backup of table %s because it is either an external or foreign table.", table.FQN()) + numExtOrForeignTables++ } } - gplog.Warn("No tables in backup set contain data. Performing metadata-only backup instead.") - backupReport.MetadataOnly = true } + return backupDataSet, numExtOrForeignTables } // Acquire AccessShareLock on a table with NOWAIT option. If we are unable to acquire diff --git a/backup/data_test.go b/backup/data_test.go index d4d7db22e..ffb0b31fa 100644 --- a/backup/data_test.go +++ b/backup/data_test.go @@ -197,7 +197,7 @@ var _ = Describe("backup/data tests", func() { Expect(counters.NumRegTables).To(Equal(int64(1))) }) }) - Describe("CheckDBContainsData", func() { + Describe("GetBackupDataSet", func() { config := history.BackupConfig{} var testTable backup.Table BeforeEach(func() { @@ -208,24 +208,38 @@ var _ = Describe("backup/data tests", func() { TableDefinition: backup.TableDefinition{}, } }) - It("changes backup type to metadata if no tables in DB", func() { - backup.CheckTablesContainData([]backup.Table{}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeTrue()) + It("excludes a foreign table", func() { + foreignDef := backup.ForeignTableDefinition{Oid: 23, Options: "", Server: "fs"} + testTable.ForeignDef = foreignDef + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(0)) + Expect(numExtOrForeignTables).To(Equal(int64(1))) }) - It("changes backup type to metadata if only external or foreign tables in database", func() { + It("excludes an external table", func() { testTable.IsExternal = true - backup.CheckTablesContainData([]backup.Table{testTable}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeTrue()) + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(0)) + Expect(numExtOrForeignTables).To(Equal(int64(1))) }) - It("does not change backup type if metadata-only backup", func() { + It("doesn't exclude regular table", func() { + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(1)) + Expect(numExtOrForeignTables).To(Equal(int64(0))) + }) + It("returns an empty set, if --metadata-only flag is set and a regular table is given", func() { config.MetadataOnly = true backup.SetReport(&report.Report{BackupConfig: config}) - backup.CheckTablesContainData([]backup.Table{}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeTrue()) - }) - It("does not change backup type if tables present in database", func() { - backup.CheckTablesContainData([]backup.Table{testTable}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeFalse()) + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(0)) + Expect(numExtOrForeignTables).To(Equal(int64(0))) }) }) }) From 39cc1e865d805997fc59c6698a038ebea6987623 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Tue, 14 Dec 2021 13:39:07 +1000 Subject: [PATCH 11/25] remove checks for external tables in queued backup We've moved all external tables check from backup data routines in 76830c67075ae5e21afe0b0ce1dee7b0ee91418b. There is new routine to backup into single data file with a connection queue implemented in 5925c1edf3be958e10044c98eff8f037ddc55227. So remove checks from this new routine to as useless. Co-authored-by: Polina Bungina --- backup/data.go | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/backup/data.go b/backup/data.go index cdd913ac3..04e20a7d1 100644 --- a/backup/data.go +++ b/backup/data.go @@ -120,13 +120,7 @@ func BackupSingleTableData(table Table, rowsCopiedMap map[uint32]int64, counters // workers encounter locking issues. Worker 0 already has all locks on the // tables so it will not run into locking issues. func backupDataForAllTablesCopyQueue(tables []Table) []map[uint32]int64 { - var numExtOrForeignTables int64 - for _, table := range tables { - if table.SkipDataBackup() { - numExtOrForeignTables++ - } - } - counters := BackupProgressCounters{NumRegTables: 0, TotalRegTables: int64(len(tables)) - numExtOrForeignTables} + counters := BackupProgressCounters{NumRegTables: 0, TotalRegTables: int64(len(tables))} counters.ProgressBar = utils.NewProgressBar(int(counters.TotalRegTables), "Tables backed up: ", utils.PB_INFO) counters.ProgressBar.Start() rowsCopiedMaps := make([]map[uint32]int64, connectionPool.NumConns) @@ -158,11 +152,6 @@ func backupDataForAllTablesCopyQueue(tables []Table) []map[uint32]int64 { return } - if table.SkipDataBackup() { - gplog.Verbose("Skipping data backup of table %s because it is either an external or foreign table.", table.FQN()) - oidMap.Store(table.Oid, Complete) - continue - } // If a random external SQL command had queued an AccessExclusiveLock acquisition request // against this next table, the --job worker thread would deadlock on the COPY attempt. // To prevent gpbackup from hanging, we attempt to acquire an AccessShareLock on the @@ -261,7 +250,6 @@ func backupDataForAllTablesCopyQueue(tables []Table) []map[uint32]int64 { } counters.ProgressBar.Finish() - printDataBackupWarnings(numExtOrForeignTables) return rowsCopiedMaps } From 7f90bf41106ca7e5f20f0c296eaaade95ac52a40 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Sat, 5 Mar 2022 10:46:22 +0000 Subject: [PATCH 12/25] change golang version to 1.17.6 according to eb19837 --- arenadata/run_gpbackup_tests.bash | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arenadata/run_gpbackup_tests.bash b/arenadata/run_gpbackup_tests.bash index 174ff9d18..ccea91d1d 100644 --- a/arenadata/run_gpbackup_tests.bash +++ b/arenadata/run_gpbackup_tests.bash @@ -14,6 +14,6 @@ source /usr/local/greenplum-db-devel/greenplum_path.sh; source ~/gpdb_src/gpAux/gpdemo/gpdemo-env.sh; gpconfig -c shared_preload_libraries -v dummy_seclabel; gpstop -ar; -wget https://golang.org/dl/go1.14.15.linux-amd64.tar.gz; -tar -C ~/ -xzf go1.14.15.linux-amd64.tar.gz; +wget https://golang.org/dl/go1.17.6.linux-amd64.tar.gz; +tar -C ~/ -xzf go1.17.6.linux-amd64.tar.gz; PATH=$PATH:~/go/bin GOPATH=~/go make depend build install test end_to_end -C go/src/github.com/greenplum-db/gpbackup/" From cd3e730cff06bc6b7c8cba3c3ff32371a9a65d32 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Wed, 6 Jul 2022 11:30:43 +1000 Subject: [PATCH 13/25] fix own tests after upstream refactoring --- report/report_test.go | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/report/report_test.go b/report/report_test.go index bb04056e1..3f63eeb5d 100644 --- a/report/report_test.go +++ b/report/report_test.go @@ -394,23 +394,23 @@ restore status: Success but non-fatal errors occurred. See log file .+ for }) It("Panics if gpbackup version is greater than gprestore version (arenadata build)", func() { defer testhelper.ShouldPanicWithMessage("gprestore arenadata9 cannot restore a backup taken with gpbackup arenadata10; please use gprestore arenadata10 or later.") - EnsureBackupVersionCompatibility("1.21.0_arenadata10", "1.21.0_arenadata9") + report.EnsureBackupVersionCompatibility("1.21.0_arenadata10", "1.21.0_arenadata9") }) It("Does not panic if gpbackup version is less than gprestore version (arenadata build)", func() { - EnsureBackupVersionCompatibility("1.21.0_arenadata9", "1.21.0_arenadata10") + report.EnsureBackupVersionCompatibility("1.21.0_arenadata9", "1.21.0_arenadata10") }) It("Does not panic if gpbackup version equals gprestore version (arenadata build)", func() { - EnsureBackupVersionCompatibility("1.20.4_arenadata1", "1.20.4_arenadata1") + report.EnsureBackupVersionCompatibility("1.20.4_arenadata1", "1.20.4_arenadata1") }) It("Does not panic if gpbackup version equals gprestore version (arenadata dev build)", func() { - EnsureBackupVersionCompatibility("1.20.4_arenadata2+dev.1.g768b7e0", "1.20.4_arenadata2+dev.1.g768b7e0") + report.EnsureBackupVersionCompatibility("1.20.4_arenadata2+dev.1.g768b7e0", "1.20.4_arenadata2+dev.1.g768b7e0") }) It("Does not panic if gpbackup has upstream version, while gprestore has arenadata version", func() { - EnsureBackupVersionCompatibility("1.20.4", "1.20.4_arenadata2") + report.EnsureBackupVersionCompatibility("1.20.4", "1.20.4_arenadata2") }) It("Panics if gpbackup has upstream version, gprestore has arenadata version and gpbackup version is greater", func() { defer testhelper.ShouldPanicWithMessage("gprestore 1.20.4 cannot restore a backup taken with gpbackup 1.20.5; please use gprestore 1.20.5 or later.") - EnsureBackupVersionCompatibility("1.20.5", "1.20.4_arenadata2") + report.EnsureBackupVersionCompatibility("1.20.5", "1.20.4_arenadata2") }) }) Describe("EnsureDatabaseVersionCompatibility", func() { From cdbf50f1b69379e306077fcdb6e35f8398edb6e2 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Wed, 6 Jul 2022 13:11:20 +1000 Subject: [PATCH 14/25] fix locks test afected by backup table order Out fork order tables by relpages not by oid. As a result other tables was backuped first. --- end_to_end/locks_test.go | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/end_to_end/locks_test.go b/end_to_end/locks_test.go index 1e93c3b1e..a45cb911f 100644 --- a/end_to_end/locks_test.go +++ b/end_to_end/locks_test.go @@ -200,8 +200,8 @@ var _ = Describe("Deadlock handling", func() { // Concurrently wait for gpbackup to block on the trigger metadata dump section. Once we // see gpbackup blocked, request AccessExclusiveLock (to imitate a TRUNCATE or VACUUM // FULL) on all the test tables. - dataTables := []string{`public."FOObar"`, "public.foo", "public.holds", "public.sales", "public.bigtable", - "schema2.ao1", "schema2.ao2", "schema2.foo2", "schema2.foo3", "schema2.returns"} + dataTables := []string{"public.bigtable", "public.holds", "public.foo", "schema2.foo3", "schema2.ao1", + "schema2.ao2", `public."FOObar"`, "public.sales", "schema2.foo2", "schema2.returns"} for _, dataTable := range dataTables { go func(dataTable string) { accessExclusiveLockConn := testutils.SetupTestDbConn("testdb") @@ -260,7 +260,7 @@ var _ = Describe("Deadlock handling", func() { // Sanity check that 10 deadlock traps were placed during the test Expect(accessExclBlockedLockCount).To(Equal(10)) // No non-main worker should have been able to run COPY due to deadlock detection - for i := 1; i < 2; i++ { + for i := 1; i <= 2; i++ { expectedLockString := fmt.Sprintf("[DEBUG]:-Worker %d: LOCK TABLE ", i) Expect(stdout).To(ContainSubstring(expectedLockString)) @@ -270,7 +270,7 @@ var _ = Describe("Deadlock handling", func() { unexpectedCopyString := fmt.Sprintf("[DEBUG]:-Worker %d: COPY ", i) Expect(stdout).ToNot(ContainSubstring(unexpectedCopyString)) - expectedLockString = fmt.Sprintf(`Locks held on table %s`, dataTables[i]) + expectedLockString = fmt.Sprintf(`Locks held on table %s`, dataTables[i-1]) Expect(stdout).To(ContainSubstring(expectedLockString)) Expect(stdout).To(ContainSubstring(`"Mode":"AccessExclusiveLock"`)) From 11da7e0a9f26c83dd04bed6ec067345e6f30a4fa Mon Sep 17 00:00:00 2001 From: Dennis Kovalenko <84862840+dnskvlnk@users.noreply.github.com> Date: Wed, 24 Aug 2022 08:03:41 +0300 Subject: [PATCH 15/25] ADBDEV-2901 Fix order of restoring metadata (#23) Initially, gpbackup created consistent copies of databases, ordering all object by oid, which created a correct sequence of objects. But in ADBDEV-1973 (#9) the backup order was changed and the objects started to be sorted by their size, which broke the logic of backup process. This patch restores ordering by oid, but doesn't break order based on relation size, because gpbackup stores two different lists of metadata and data objects. This patch reorders metadata only, so all objects can be correclty restored, but doesn't alter data objects order, so the data can be recovered in any desired way. --- backup/backup.go | 2 +- backup/dependencies.go | 19 +++++++++++++++++++ backup/wrappers.go | 1 + end_to_end/end_to_end_suite_test.go | 25 +++++++++++++++++++++++++ 4 files changed, 46 insertions(+), 1 deletion(-) diff --git a/backup/backup.go b/backup/backup.go index 675192c20..137fe6fc9 100644 --- a/backup/backup.go +++ b/backup/backup.go @@ -226,10 +226,10 @@ func backupPredata(metadataFile *utils.FileWithByteCount, tables []Table, tableO objects := make([]Sortable, 0) metadataMap := make(MetadataMap) + objects = append(objects, convertToSortableSlice(tables)...) if !tableOnly { functions, funcInfoMap = retrieveFunctions(&objects, metadataMap) } - objects = append(objects, convertToSortableSlice(tables)...) relationMetadata := GetMetadataForObjectType(connectionPool, TYPE_RELATION) addToMetadataMap(relationMetadata, metadataMap) diff --git a/backup/dependencies.go b/backup/dependencies.go index 6f3f80274..8ecd28fbf 100644 --- a/backup/dependencies.go +++ b/backup/dependencies.go @@ -2,6 +2,7 @@ package backup import ( "fmt" + "sort" "strings" "github.com/greenplum-db/gp-common-go-libs/dbconn" @@ -85,6 +86,24 @@ type Sortable interface { GetUniqueID() UniqueID } +type Sortables []Sortable + +func (s Sortables) Len() int { + return len(s) +} +func (s Sortables) Swap(i, j int) { + s[i], s[j] = s[j], s[i] +} +func (s Sortables) Less(i, j int) bool { + return s[i].GetUniqueID().Oid < s[j].GetUniqueID().Oid +} + +func SortByOid(sortables []Sortable) []Sortable { + s := Sortables(sortables) + sort.Sort(s) + return []Sortable(s) +} + func TopologicalSort(slice []Sortable, dependencies DependencyMap) []Sortable { inDegrees := make(map[UniqueID]int) dependencyIndexes := make(map[UniqueID]int) diff --git a/backup/wrappers.go b/backup/wrappers.go index 90ff95fe7..295af9a39 100644 --- a/backup/wrappers.go +++ b/backup/wrappers.go @@ -566,6 +566,7 @@ func backupDependentObjects(metadataFile *utils.FileWithByteCount, tables []Tabl gplog.Verbose("Writing CREATE statements for dependent objects to metadata file") + sortables = SortByOid(sortables) backupSet := createBackupSet(sortables) relevantDeps := GetDependencies(connectionPool, backupSet) if connectionPool.Version.Is("4") && !tableOnly { diff --git a/end_to_end/end_to_end_suite_test.go b/end_to_end/end_to_end_suite_test.go index 5d83baf45..d4ba253e9 100644 --- a/end_to_end/end_to_end_suite_test.go +++ b/end_to_end/end_to_end_suite_test.go @@ -1445,6 +1445,31 @@ var _ = Describe("backup and restore end to end tests", func() { assertArtifactsCleaned(restoreConn, timestamp) }) + It("runs gpbackup and gprestore to backup functions depending on table row's type", func() { + skipIfOldBackupVersionBefore("1.19.0") + + testhelper.AssertQueryRuns(backupConn, "CREATE TABLE table_provides_type (n int);") + defer testhelper.AssertQueryRuns(backupConn, "DROP TABLE table_provides_type;") + + testhelper.AssertQueryRuns(backupConn, "INSERT INTO table_provides_type values (1);") + testhelper.AssertQueryRuns(backupConn, "CREATE OR REPLACE FUNCTION func_depends_on_row_type(arg table_provides_type[]) RETURNS void AS $$ BEGIN; SELECT NULL; END; $$ LANGUAGE SQL;") + + defer testhelper.AssertQueryRuns(backupConn, "DROP FUNCTION func_depends_on_row_type(arg table_provides_type[]);") + + timestamp := gpbackup(gpbackupPath, backupHelperPath) + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--redirect-db", "restoredb") + + assertRelationsCreated(restoreConn, TOTAL_RELATIONS+1) // for 1 new table + assertDataRestored(restoreConn, schema2TupleCounts) + assertDataRestored(restoreConn, map[string]int{ + "public.foo": 40000, + "public.holds": 50000, + "public.sales": 13, + "public.table_provides_type": 1}) + + assertArtifactsCleaned(restoreConn, timestamp) + }) It("Can restore xml with xmloption set to document", func() { testutils.SkipIfBefore6(backupConn) // Set up the XML table that contains XML content From 8069da7a7e57a3f893d99be93bef8eddd8e59b04 Mon Sep 17 00:00:00 2001 From: Dmitry Kamovsky Date: Thu, 10 Nov 2022 07:40:15 +0000 Subject: [PATCH 16/25] Add gitlab pipeline file --- .gitlab-ci.yml | 4 ++++ 1 file changed, 4 insertions(+) create mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml new file mode 100644 index 000000000..c18467b7f --- /dev/null +++ b/.gitlab-ci.yml @@ -0,0 +1,4 @@ +include: + - project: 'arenadata/infrastructure/code/ci/gitlab_ci_files' + ref: master + file: '/development/gpbackup.yaml' From d2a8b6a94fb9551db3dbf17bd22c873056dfc872 Mon Sep 17 00:00:00 2001 From: Dmitry Kamovsky Date: Wed, 18 Jan 2023 13:01:08 +1000 Subject: [PATCH 17/25] Delete gitlab pipeline --- .gitlab-ci.yml | 4 ---- 1 file changed, 4 deletions(-) delete mode 100644 .gitlab-ci.yml diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml deleted file mode 100644 index c18467b7f..000000000 --- a/.gitlab-ci.yml +++ /dev/null @@ -1,4 +0,0 @@ -include: - - project: 'arenadata/infrastructure/code/ci/gitlab_ci_files' - ref: master - file: '/development/gpbackup.yaml' From 39a62d4e50beff8cb4c33d84e96a8764577bdc62 Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Wed, 25 Jan 2023 19:37:50 +1000 Subject: [PATCH 18/25] Revert "Order tables for backup by relpages value" This reverts commit 279efb502426b05098b975cad62deabc6bce6421. --- backup/queries_relations.go | 58 +++------- helper/helper.go | 2 + integration/wrappers_test.go | 211 +---------------------------------- options/options.go | 77 ++++++------- 4 files changed, 57 insertions(+), 291 deletions(-) diff --git a/backup/queries_relations.go b/backup/queries_relations.go index c61a546f8..bb15fa46a 100644 --- a/backup/queries_relations.go +++ b/backup/queries_relations.go @@ -91,29 +91,17 @@ func getUserTableRelations(connectionPool *dbconn.DBConn) []Relation { } query := fmt.Sprintf(` - SELECT schemaoid, oid, schema, name FROM ( - SELECT n.oid AS schemaoid, - c.oid AS oid, - quote_ident(n.nspname) AS schema, - quote_ident(c.relname) AS name, - coalesce(prt.pages, c.relpages) AS pages - FROM pg_class c + SELECT n.oid AS schemaoid, + c.oid AS oid, + quote_ident(n.nspname) AS schema, + quote_ident(c.relname) AS name + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid - LEFT JOIN ( - SELECT - p.parrelid, - sum(pc.relpages) AS pages - FROM pg_partition_rule AS pr - JOIN pg_partition AS p ON pr.paroid = p.oid - JOIN pg_class AS pc ON pr.parchildrelid = pc.oid - GROUP BY p.parrelid - ) AS prt ON prt.parrelid = c.oid - WHERE %s - %s - AND relkind = 'r' - AND %s - ) res - ORDER BY pages DESC, oid`, + WHERE %s + %s + AND relkind = 'r' + AND %s + ORDER BY c.oid`, relationAndSchemaFilterClause(), childPartitionFilter, ExtensionFilterClause("c")) results := make([]Relation, 0) @@ -127,27 +115,15 @@ func getUserTableRelationsWithIncludeFiltering(connectionPool *dbconn.DBConn, in includeOids := getOidsFromRelationList(connectionPool, includedRelationsQuoted) oidStr := strings.Join(includeOids, ", ") query := fmt.Sprintf(` - SELECT schemaoid, oid, schema, name FROM ( - SELECT n.oid AS schemaoid, - c.oid AS oid, - quote_ident(n.nspname) AS schema, - quote_ident(c.relname) AS name, - coalesce(prt.pages, c.relpages) AS pages - FROM pg_class c + SELECT n.oid AS schemaoid, + c.oid AS oid, + quote_ident(n.nspname) AS schema, + quote_ident(c.relname) AS name + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid - LEFT JOIN ( - SELECT - p.parrelid, - sum(pc.relpages) AS pages - FROM pg_partition_rule AS pr - JOIN pg_partition AS p ON pr.paroid = p.oid - JOIN pg_class AS pc ON pr.parchildrelid = pc.oid - GROUP BY p.parrelid - ) AS prt ON prt.parrelid = c.oid - WHERE c.oid IN (%s) + WHERE c.oid IN (%s) AND (relkind = 'r') - ) res - ORDER BY pages DESC, oid`, oidStr) + ORDER BY c.oid`, oidStr) results := make([]Relation, 0) err := connectionPool.Select(&results, query) diff --git a/helper/helper.go b/helper/helper.go index 69fb3cd6a..5d88673d3 100644 --- a/helper/helper.go +++ b/helper/helper.go @@ -8,6 +8,7 @@ import ( "os" "os/signal" "path/filepath" + "sort" "strconv" "strings" "sync" @@ -205,6 +206,7 @@ func getOidListFromFile() ([]int, error) { num, _ := strconv.Atoi(oid) oidList[i] = num } + sort.Ints(oidList) return oidList, nil } diff --git a/integration/wrappers_test.go b/integration/wrappers_test.go index 6c4d6e835..db3a08e16 100644 --- a/integration/wrappers_test.go +++ b/integration/wrappers_test.go @@ -12,18 +12,16 @@ import ( ) var _ = Describe("Wrappers Integration", func() { - BeforeEach(func() { - gplog.SetVerbosity(gplog.LOGERROR) // turn off progress bar in the lock-table routine - }) Describe("RetrieveAndProcessTables", func() { BeforeEach(func() { - rootCmd := &cobra.Command{} - includes := []string{"--include-table", "public.foo", "--include-table", "public.BAR"} - rootCmd.SetArgs(options.HandleSingleDashes(includes)) + gplog.SetVerbosity(gplog.LOGERROR) // turn off progress bar in the lock-table routine + var rootCmd = &cobra.Command{} backup.DoInit(rootCmd) // initialize the ObjectCount - rootCmd.Execute() }) It("returns the data tables that have names with special characters", func() { + _ = backupCmdFlags.Set(options.INCLUDE_RELATION, "public.foo") + _ = backupCmdFlags.Set(options.INCLUDE_RELATION, "public.BAR") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.foo(i int); INSERT INTO public.foo VALUES (1);") testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public."BAR"(i int); INSERT INTO public."BAR" VALUES (1);`) defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.foo;") @@ -39,203 +37,4 @@ var _ = Describe("Wrappers Integration", func() { Expect(dataTables[1].Name).To(Equal(`"BAR"`)) }) }) - Describe("Tables order when no filtering is used or tables filtering is defined", func() { - BeforeEach(func() { - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.empty(i int);") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.thousands(i int); INSERT INTO public.thousands SELECT generate_series(0, 10000);") - testhelper.AssertQueryRuns(connectionPool, "ANALYZE") - - connectionPool.MustBegin(0) - }) - AfterEach(func() { - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.empty;") - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.thousands;") - - connectionPool.MustCommit(0) - }) - It("returns the data tables in descending order of their sizes (relpages)", func() { - rootCmd := &cobra.Command{} - backup.DoInit(rootCmd) // initialize the ObjectCount - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(3)) - Expect(dataTables[0].Name).To(Equal("thousands")) - Expect(dataTables[1].Name).To(Equal("ten")) - Expect(dataTables[2].Name).To(Equal("empty")) - }) - It("returns the data tables in descending order of their sizes (relpages) when include-tables(-file) flag is used", func() { - rootCmd := &cobra.Command{} - includes := []string{"--include-table", "public.empty", "--include-table", "public.ten"} - rootCmd.SetArgs(options.HandleSingleDashes(includes)) - backup.DoInit(rootCmd) // initialize the ObjectCount - rootCmd.Execute() - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(2)) - Expect(dataTables[0].Name).To(Equal("ten")) - Expect(dataTables[1].Name).To(Equal("empty")) - }) - It("returns the data tables in descending order of their sizes (relpages) when exclude-tables(s) flag is used", func() { - rootCmd := &cobra.Command{} - includes := []string{"--exclude-table", "public.thousands"} - rootCmd.SetArgs(options.HandleSingleDashes(includes)) - backup.DoInit(rootCmd) // initialize the ObjectCount - rootCmd.Execute() - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(2)) - Expect(dataTables[0].Name).To(Equal("ten")) - Expect(dataTables[1].Name).To(Equal("empty")) - }) - }) - Describe("Tables order when schema filtering is defined", func() { - BeforeEach(func() { - testhelper.AssertQueryRuns(connectionPool, "CREATE SCHEMA filterschema;") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE filterschema.empty(i int);") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE filterschema.thousands(i int); INSERT INTO filterschema.thousands SELECT generate_series(0, 1000);") - testhelper.AssertQueryRuns(connectionPool, "ANALYZE") - - connectionPool.MustBegin(0) - }) - AfterEach(func() { - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE filterschema.empty;") - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE filterschema.thousands;") - testhelper.AssertQueryRuns(connectionPool, "DROP SCHEMA filterschema;") - - connectionPool.MustCommit(0) - }) - It("returns the data tables in descending order of their sizes (relpages) when include-schema(s) flag is used", func() { - rootCmd := &cobra.Command{} - includes := []string{"--include-schema", "filterschema"} - rootCmd.SetArgs(options.HandleSingleDashes(includes)) - backup.DoInit(rootCmd) // initialize the ObjectCount - rootCmd.Execute() - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(2)) - Expect(dataTables[0].Name).To(Equal("thousands")) - Expect(dataTables[1].Name).To(Equal("empty")) - }) - It("returns the data tables in descending order of their sizes (relpages) when exclude-schema(s) flag is used", func() { - rootCmd := &cobra.Command{} - includes := []string{"--exclude-schema", "public"} - rootCmd.SetArgs(options.HandleSingleDashes(includes)) - backup.DoInit(rootCmd) // initialize the ObjectCount - rootCmd.Execute() - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(2)) - Expect(dataTables[0].Name).To(Equal("thousands")) - Expect(dataTables[1].Name).To(Equal("empty")) - }) - }) - Describe("Tables order cases, when there is a partitioned table to backup", func() { - BeforeEach(func() { - testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public.partition_table (id int, gender char(1)) - DISTRIBUTED BY (id) - PARTITION BY LIST (gender) - ( PARTITION girls VALUES ('F'), - PARTITION boys VALUES ('M'), - DEFAULT PARTITION other );`) - testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.partition_table VALUES (generate_series(0,10000), 'F');") - testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.partition_table VALUES (generate_series(0,10), NULL);") - testhelper.AssertQueryRuns(connectionPool, "ANALYZE") - - connectionPool.MustBegin(0) - }) - AfterEach(func() { - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.partition_table") - - connectionPool.MustCommit(0) - }) - It("returns the data tables in descending order of their sizes (relpages), when there is a partitioned table to backup", func() { - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") - defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") - testhelper.AssertQueryRuns(connectionPool, "ANALYZE") - - rootCmd := &cobra.Command{} - backup.DoInit(rootCmd) // initialize the ObjectCount - - opts, _ := options.NewOptions(rootCmd.Flags()) - err := opts.ExpandIncludesForPartitions(connectionPool, rootCmd.Flags()) - Expect(err).ShouldNot(HaveOccurred()) - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(2)) - Expect(dataTables[0].Name).To(Equal("partition_table")) - Expect(dataTables[1].Name).To(Equal("ten")) - }) - It("returns the data tables in descending order of their sizes (relpages), when there is a partitioned table to backup and leaf-partition-data flag is set", func() { - rootCmd := &cobra.Command{} - includes := []string{"--leaf-partition-data"} - rootCmd.SetArgs(options.HandleSingleDashes(includes)) - backup.DoInit(rootCmd) // initialize the ObjectCount - rootCmd.Execute() - - opts, _ := options.NewOptions(rootCmd.Flags()) - err := opts.ExpandIncludesForPartitions(connectionPool, rootCmd.Flags()) - Expect(err).ShouldNot(HaveOccurred()) - - _, dataTables := backup.RetrieveAndProcessTables() - - Expect(len(dataTables)).To(Equal(3)) - Expect(dataTables[0].Name).To(Equal("partition_table_1_prt_girls")) - Expect(dataTables[1].Name).To(Equal("partition_table_1_prt_other")) - Expect(dataTables[2].Name).To(Equal("partition_table_1_prt_boys")) - }) - }) - Describe("Tables order cases, when there is an AO/CO table to backup", func() { - BeforeEach(func() { - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.empty(i int);") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.thousands(i int); INSERT INTO public.thousands SELECT generate_series(0, 10000);") - testhelper.AssertQueryRuns(connectionPool, "ANALYZE") - }) - AfterEach(func() { - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.empty;") - testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.thousands;") - - connectionPool.MustCommit(0) - }) - It("returns the data tables in descending order of their sizes (relpages), when there is an AO table to backup", func() { - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.hundred(i int) WITH (appendonly=true) DISTRIBUTED RANDOMLY;") - testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.hundred SELECT generate_series(0, 100);") - defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.hundred;") - testhelper.AssertQueryRuns(connectionPool, "VACUUM public.hundred") // relpages of AOCO is not updated by ANALYZE - - connectionPool.MustBegin(0) //VACUUM cannot be run inside a transaction block - - rootCmd := &cobra.Command{} - backup.DoInit(rootCmd) // initialize the ObjectCount - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(3)) - - Expect(dataTables[0].Name).To(Equal("thousands")) - Expect(dataTables[1].Name).To(Equal("hundred")) - Expect(dataTables[2].Name).To(Equal("empty")) - - }) - It("returns the data tables in descending order of their sizes (relpages), when there is an AOCO table to backup", func() { - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.hundred(i int) WITH (appendonly=true, orientation=column) DISTRIBUTED RANDOMLY;") - testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.hundred SELECT generate_series(0, 100);") - defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.hundred;") - testhelper.AssertQueryRuns(connectionPool, "VACUUM") // relpages of AOCO is not updated by ANALYZE - - connectionPool.MustBegin(0) //VACUUM cannot be run inside a transaction block - - rootCmd := &cobra.Command{} - backup.DoInit(rootCmd) // initialize the ObjectCount - - _, dataTables := backup.RetrieveAndProcessTables() - Expect(len(dataTables)).To(Equal(3)) - - Expect(dataTables[0].Name).To(Equal("thousands")) - Expect(dataTables[1].Name).To(Equal("hundred")) - Expect(dataTables[2].Name).To(Equal("empty")) - }) - }) }) diff --git a/options/options.go b/options/options.go index 8ae5826e8..f3e823a5c 100644 --- a/options/options.go +++ b/options/options.go @@ -302,54 +302,43 @@ func (o Options) getUserTableRelationsWithIncludeFiltering(connectionPool *dbcon } query := fmt.Sprintf(` -SELECT schemaname, tablename FROM ( - SELECT c.oid AS oid, - n.nspname AS schemaname, - c.relname AS tablename, - coalesce(prt.pages, c.relpages) AS pages - FROM pg_class c - JOIN pg_namespace n ON c.relnamespace = n.oid - LEFT JOIN ( +SELECT + n.nspname AS schemaname, + c.relname AS tablename +FROM pg_class c +JOIN pg_namespace n + ON c.relnamespace = n.oid +WHERE %s +AND ( + -- Get tables in the include list + c.oid IN (%s) + -- Get parent partition tables whose children are in the include list + OR c.oid IN ( SELECT - p.parrelid, - sum(pc.relpages) AS pages - FROM pg_partition_rule AS pr - JOIN pg_partition AS p ON pr.paroid = p.oid - JOIN pg_class AS pc ON pr.parchildrelid = pc.oid - GROUP BY p.parrelid - ) AS prt ON prt.parrelid = c.oid - WHERE %s - AND ( - -- Get tables in the include list - c.oid IN (%s) - -- Get parent partition tables whose children are in the include list - OR c.oid IN ( - SELECT - p.parrelid - FROM pg_partition p - JOIN pg_partition_rule r ON p.oid = r.paroid - WHERE p.paristemplate = false - AND r.parchildrelid IN (%s) - ) - -- Get external partition tables whose siblings are in the include list - OR c.oid IN ( + p.parrelid + FROM pg_partition p + JOIN pg_partition_rule r ON p.oid = r.paroid + WHERE p.paristemplate = false + AND r.parchildrelid IN (%s) + ) + -- Get external partition tables whose siblings are in the include list + OR c.oid IN ( + SELECT + r.parchildrelid + FROM pg_partition_rule r + JOIN pg_exttable e ON r.parchildrelid = e.reloid + WHERE r.paroid IN ( SELECT - r.parchildrelid - FROM pg_partition_rule r - JOIN pg_exttable e ON r.parchildrelid = e.reloid - WHERE r.paroid IN ( - SELECT - pr.paroid - FROM pg_partition_rule pr - WHERE pr.parchildrelid IN (%s) - ) + pr.paroid + FROM pg_partition_rule pr + WHERE pr.parchildrelid IN (%s) ) - %s ) - AND (relkind = 'r' OR relkind = 'f') - AND %s -) res -ORDER BY pages DESC, oid;`, o.schemaFilterClause("n"), oidStr, oidStr, oidStr, childPartitionFilter, ExtensionFilterClause("c")) + %s +) +AND (relkind = 'r' OR relkind = 'f') +AND %s +ORDER BY c.oid;`, o.schemaFilterClause("n"), oidStr, oidStr, oidStr, childPartitionFilter, ExtensionFilterClause("c")) results := make([]FqnStruct, 0) err = connectionPool.Select(&results, query) From 1ddc17ed0f2a702cf32fec7d0810b4d7521b473b Mon Sep 17 00:00:00 2001 From: hughcapet Date: Tue, 17 Aug 2021 08:03:21 +0300 Subject: [PATCH 19/25] Order tables for backup by relpages value Currently the output of RetrieveAndProcessTables function is ordered by oid, which is not always optimal. This commit orders the list of tables for backup by their size (by replages value from pg_class actually). It is necessary to remember, that it only makes sense, when relpages value is updated by either vacuum, analyze or a few ddl commands (such as create index) There is also a wrapper integration test (RetrieveAndProcessTables) fixed: it used empty flag set and didn't properly test tables inclusion Cherry-picker from: 279efb502426b05098b975cad62deabc6bce6421 Removed changes from options.go as irrelevant and useless --- backup/queries_relations.go | 58 +++++++--- helper/helper.go | 2 - integration/wrappers_test.go | 211 ++++++++++++++++++++++++++++++++++- 3 files changed, 247 insertions(+), 24 deletions(-) diff --git a/backup/queries_relations.go b/backup/queries_relations.go index fe21cc933..d47169c1d 100644 --- a/backup/queries_relations.go +++ b/backup/queries_relations.go @@ -98,17 +98,29 @@ func getUserTableRelations(connectionPool *dbconn.DBConn) []Relation { } query := fmt.Sprintf(` - SELECT n.oid AS schemaoid, - c.oid AS oid, - quote_ident(n.nspname) AS schema, - quote_ident(c.relname) AS name - FROM pg_class c + SELECT schemaoid, oid, schema, name FROM ( + SELECT n.oid AS schemaoid, + c.oid AS oid, + quote_ident(n.nspname) AS schema, + quote_ident(c.relname) AS name, + coalesce(prt.pages, c.relpages) AS pages + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid - WHERE %s - %s - AND relkind IN (%s) - AND %s - ORDER BY c.oid`, + LEFT JOIN ( + SELECT + p.parrelid, + sum(pc.relpages) AS pages + FROM pg_partition_rule AS pr + JOIN pg_partition AS p ON pr.paroid = p.oid + JOIN pg_class AS pc ON pr.parchildrelid = pc.oid + GROUP BY p.parrelid + ) AS prt ON prt.parrelid = c.oid + WHERE %s + %s + AND relkind IN (%s) + AND %s + ) res + ORDER BY pages DESC, oid`, relationAndSchemaFilterClause(), childPartitionFilter, relkindFilter, ExtensionFilterClause("c")) results := make([]Relation, 0) @@ -128,15 +140,27 @@ func getUserTableRelationsWithIncludeFiltering(connectionPool *dbconn.DBConn, in includeOids := getOidsFromRelationList(connectionPool, includedRelationsQuoted) oidStr := strings.Join(includeOids, ", ") query := fmt.Sprintf(` - SELECT n.oid AS schemaoid, - c.oid AS oid, - quote_ident(n.nspname) AS schema, - quote_ident(c.relname) AS name - FROM pg_class c + SELECT schemaoid, oid, schema, name FROM ( + SELECT n.oid AS schemaoid, + c.oid AS oid, + quote_ident(n.nspname) AS schema, + quote_ident(c.relname) AS name, + coalesce(prt.pages, c.relpages) AS pages + FROM pg_class c JOIN pg_namespace n ON c.relnamespace = n.oid - WHERE c.oid IN (%s) + LEFT JOIN ( + SELECT + p.parrelid, + sum(pc.relpages) AS pages + FROM pg_partition_rule AS pr + JOIN pg_partition AS p ON pr.paroid = p.oid + JOIN pg_class AS pc ON pr.parchildrelid = pc.oid + GROUP BY p.parrelid + ) AS prt ON prt.parrelid = c.oid + WHERE c.oid IN (%s) AND relkind IN (%s) - ORDER BY c.oid`, oidStr, relkindFilter) + ) res + ORDER BY pages DESC, oid`, oidStr, relkindFilter) results := make([]Relation, 0) err := connectionPool.Select(&results, query) diff --git a/helper/helper.go b/helper/helper.go index 1efedefc1..3d0dd14b3 100644 --- a/helper/helper.go +++ b/helper/helper.go @@ -8,7 +8,6 @@ import ( "os" "os/signal" "path/filepath" - "sort" "strconv" "strings" "sync" @@ -210,7 +209,6 @@ func getOidListFromFile(oidFileName string) ([]int, error) { num, _ := strconv.Atoi(oid) oidList[i] = num } - sort.Ints(oidList) return oidList, nil } diff --git a/integration/wrappers_test.go b/integration/wrappers_test.go index db3a08e16..6c4d6e835 100644 --- a/integration/wrappers_test.go +++ b/integration/wrappers_test.go @@ -12,16 +12,18 @@ import ( ) var _ = Describe("Wrappers Integration", func() { + BeforeEach(func() { + gplog.SetVerbosity(gplog.LOGERROR) // turn off progress bar in the lock-table routine + }) Describe("RetrieveAndProcessTables", func() { BeforeEach(func() { - gplog.SetVerbosity(gplog.LOGERROR) // turn off progress bar in the lock-table routine - var rootCmd = &cobra.Command{} + rootCmd := &cobra.Command{} + includes := []string{"--include-table", "public.foo", "--include-table", "public.BAR"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() }) It("returns the data tables that have names with special characters", func() { - _ = backupCmdFlags.Set(options.INCLUDE_RELATION, "public.foo") - _ = backupCmdFlags.Set(options.INCLUDE_RELATION, "public.BAR") - testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.foo(i int); INSERT INTO public.foo VALUES (1);") testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public."BAR"(i int); INSERT INTO public."BAR" VALUES (1);`) defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.foo;") @@ -37,4 +39,203 @@ var _ = Describe("Wrappers Integration", func() { Expect(dataTables[1].Name).To(Equal(`"BAR"`)) }) }) + Describe("Tables order when no filtering is used or tables filtering is defined", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.empty(i int);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.thousands(i int); INSERT INTO public.thousands SELECT generate_series(0, 10000);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + connectionPool.MustBegin(0) + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.empty;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.thousands;") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages)", func() { + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(3)) + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("ten")) + Expect(dataTables[2].Name).To(Equal("empty")) + }) + It("returns the data tables in descending order of their sizes (relpages) when include-tables(-file) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--include-table", "public.empty", "--include-table", "public.ten"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("ten")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + It("returns the data tables in descending order of their sizes (relpages) when exclude-tables(s) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--exclude-table", "public.thousands"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("ten")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + }) + Describe("Tables order when schema filtering is defined", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE SCHEMA filterschema;") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE filterschema.empty(i int);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE filterschema.thousands(i int); INSERT INTO filterschema.thousands SELECT generate_series(0, 1000);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + connectionPool.MustBegin(0) + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE filterschema.empty;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE filterschema.thousands;") + testhelper.AssertQueryRuns(connectionPool, "DROP SCHEMA filterschema;") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages) when include-schema(s) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--include-schema", "filterschema"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + It("returns the data tables in descending order of their sizes (relpages) when exclude-schema(s) flag is used", func() { + rootCmd := &cobra.Command{} + includes := []string{"--exclude-schema", "public"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("empty")) + }) + }) + Describe("Tables order cases, when there is a partitioned table to backup", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, `CREATE TABLE public.partition_table (id int, gender char(1)) + DISTRIBUTED BY (id) + PARTITION BY LIST (gender) + ( PARTITION girls VALUES ('F'), + PARTITION boys VALUES ('M'), + DEFAULT PARTITION other );`) + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.partition_table VALUES (generate_series(0,10000), 'F');") + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.partition_table VALUES (generate_series(0,10), NULL);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + connectionPool.MustBegin(0) + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.partition_table") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages), when there is a partitioned table to backup", func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.ten(i int); INSERT INTO public.ten SELECT generate_series(0, 10);") + defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.ten;") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + opts, _ := options.NewOptions(rootCmd.Flags()) + err := opts.ExpandIncludesForPartitions(connectionPool, rootCmd.Flags()) + Expect(err).ShouldNot(HaveOccurred()) + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(2)) + Expect(dataTables[0].Name).To(Equal("partition_table")) + Expect(dataTables[1].Name).To(Equal("ten")) + }) + It("returns the data tables in descending order of their sizes (relpages), when there is a partitioned table to backup and leaf-partition-data flag is set", func() { + rootCmd := &cobra.Command{} + includes := []string{"--leaf-partition-data"} + rootCmd.SetArgs(options.HandleSingleDashes(includes)) + backup.DoInit(rootCmd) // initialize the ObjectCount + rootCmd.Execute() + + opts, _ := options.NewOptions(rootCmd.Flags()) + err := opts.ExpandIncludesForPartitions(connectionPool, rootCmd.Flags()) + Expect(err).ShouldNot(HaveOccurred()) + + _, dataTables := backup.RetrieveAndProcessTables() + + Expect(len(dataTables)).To(Equal(3)) + Expect(dataTables[0].Name).To(Equal("partition_table_1_prt_girls")) + Expect(dataTables[1].Name).To(Equal("partition_table_1_prt_other")) + Expect(dataTables[2].Name).To(Equal("partition_table_1_prt_boys")) + }) + }) + Describe("Tables order cases, when there is an AO/CO table to backup", func() { + BeforeEach(func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.empty(i int);") + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.thousands(i int); INSERT INTO public.thousands SELECT generate_series(0, 10000);") + testhelper.AssertQueryRuns(connectionPool, "ANALYZE") + }) + AfterEach(func() { + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.empty;") + testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.thousands;") + + connectionPool.MustCommit(0) + }) + It("returns the data tables in descending order of their sizes (relpages), when there is an AO table to backup", func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.hundred(i int) WITH (appendonly=true) DISTRIBUTED RANDOMLY;") + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.hundred SELECT generate_series(0, 100);") + defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.hundred;") + testhelper.AssertQueryRuns(connectionPool, "VACUUM public.hundred") // relpages of AOCO is not updated by ANALYZE + + connectionPool.MustBegin(0) //VACUUM cannot be run inside a transaction block + + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(3)) + + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("hundred")) + Expect(dataTables[2].Name).To(Equal("empty")) + + }) + It("returns the data tables in descending order of their sizes (relpages), when there is an AOCO table to backup", func() { + testhelper.AssertQueryRuns(connectionPool, "CREATE TABLE public.hundred(i int) WITH (appendonly=true, orientation=column) DISTRIBUTED RANDOMLY;") + testhelper.AssertQueryRuns(connectionPool, "INSERT INTO public.hundred SELECT generate_series(0, 100);") + defer testhelper.AssertQueryRuns(connectionPool, "DROP TABLE public.hundred;") + testhelper.AssertQueryRuns(connectionPool, "VACUUM") // relpages of AOCO is not updated by ANALYZE + + connectionPool.MustBegin(0) //VACUUM cannot be run inside a transaction block + + rootCmd := &cobra.Command{} + backup.DoInit(rootCmd) // initialize the ObjectCount + + _, dataTables := backup.RetrieveAndProcessTables() + Expect(len(dataTables)).To(Equal(3)) + + Expect(dataTables[0].Name).To(Equal("thousands")) + Expect(dataTables[1].Name).To(Equal("hundred")) + Expect(dataTables[2].Name).To(Equal("empty")) + }) + }) }) From 0df04ba8ba0e444063a80eefd9241bb3cc855462 Mon Sep 17 00:00:00 2001 From: Alexandr Barulev Date: Fri, 16 Jun 2023 08:57:38 +0300 Subject: [PATCH 20/25] arenadata: modify build scripts (#28) - go binary was bumped to 1.20.5 - directory to store go binaries was changed: instead of ~/go (GOPATH) directory /opt/go uses to untar go archive. It's done due to compilation errors (`unkeyed fields at composite literals`, here is a link to stackoverflow: https://stackoverflow.com/questions/54548441/composite-literal-uses-unkeyed-fields) occured at unit-test stage (restore tets), after merging with upstream code, like: ```bash #> PATH=$PATH:~/go/bin GOPATH=~/go ginkgo restore Failed to compile restore: # github.com/greenplum-db/gpbackup/restore_test ./wrappers_test.go:168:54: github.com/greenplum-db/gpbackup/history.RestorePlanEntry struct literal uses unkeyed fields ./wrappers_test.go:194:54: github.com/greenplum-db/gpbackup/history.RestorePlanEntry struct literal uses unkeyed fields ``` The error occurs at next (strange) condition: when go binary stores at GOPATH directory (untar was performed to GOPATH). Seems it's a golang issue (further research is needed) --- arenadata/run_gpbackup_tests.bash | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/arenadata/run_gpbackup_tests.bash b/arenadata/run_gpbackup_tests.bash index ccea91d1d..94c23daea 100644 --- a/arenadata/run_gpbackup_tests.bash +++ b/arenadata/run_gpbackup_tests.bash @@ -9,11 +9,11 @@ make -C gpdb_src/contrib/dummy_seclabel/ install gpdb_src/concourse/scripts/setup_gpadmin_user.bash make_cluster +wget https://golang.org/dl/go1.20.5.linux-amd64.tar.gz -O - | tar -C /opt -xz; + su - gpadmin -c " source /usr/local/greenplum-db-devel/greenplum_path.sh; source ~/gpdb_src/gpAux/gpdemo/gpdemo-env.sh; gpconfig -c shared_preload_libraries -v dummy_seclabel; gpstop -ar; -wget https://golang.org/dl/go1.17.6.linux-amd64.tar.gz; -tar -C ~/ -xzf go1.17.6.linux-amd64.tar.gz; -PATH=$PATH:~/go/bin GOPATH=~/go make depend build install test end_to_end -C go/src/github.com/greenplum-db/gpbackup/" +PATH=$PATH:/opt/go/bin:~/go/bin GOPATH=~/go make depend build install test end_to_end -C /home/gpadmin/go/src/github.com/greenplum-db/gpbackup" From ac8731e943801980f8d3b48552f51a86235a38fd Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Tue, 18 Jul 2023 11:12:34 +0500 Subject: [PATCH 21/25] Implement --report-dir option for gprestore (#30) Implement --report-dir option for gprestore New option allows to create report files in directory different from --backup-dir. Main code changes were made for filepath class. Now it takes new option as deafult path for reports with fallback to content dir. Unit and end_to_end tests were updated to show mentioned behavior. --- end_to_end/end_to_end_suite_test.go | 81 +++++++++++++++++++++++++++++ filepath/filepath.go | 24 ++++++++- filepath/filepath_test.go | 14 +++++ options/flag.go | 2 + restore/restore.go | 5 ++ 5 files changed, 125 insertions(+), 1 deletion(-) diff --git a/end_to_end/end_to_end_suite_test.go b/end_to_end/end_to_end_suite_test.go index b6ba4232e..c24f9061d 100644 --- a/end_to_end/end_to_end_suite_test.go +++ b/end_to_end/end_to_end_suite_test.go @@ -331,6 +331,18 @@ func getMetdataFileContents(backupDir string, timestamp string, fileSuffix strin return fileContentBytes } +// check restore file exist and has right permissions +func checkRestoreMetdataFile(backupDir string, timestamp string, fileSuffix string) { + file, err := path.Glob(path.Join(backupDir, "*-1/backups", timestamp[:8], timestamp, fmt.Sprintf("gprestore_%s_*_%s", timestamp, fileSuffix))) + Expect(err).ToNot(HaveOccurred()) + Expect(file).To(HaveLen(1)) + info, err := os.Stat(file[0]) + Expect(err).ToNot(HaveOccurred()) + if info.Mode() != 0444 { + Fail(fmt.Sprintf("File %s is not read-only (mode is %v).", file[0], info.Mode())) + } +} + func saveHistory(myCluster *cluster.Cluster) { // move history file out of the way, and replace in "after". This avoids adding junk to an existing gpackup_history.db @@ -1153,6 +1165,75 @@ var _ = Describe("backup and restore end to end tests", func() { Expect(actualStatisticCount).To(Equal("3")) }) }) + Describe("Restore with --report-dir", func() { + It("runs gprestore without --report-dir", func() { + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--redirect-db", "restoredb") + + // Since --report-dir and --backup-dir were not used, restore report should be in default dir + checkRestoreMetdataFile(path.Dir(backupCluster.GetDirForContent(-1)), timestamp, "report") + }) + It("runs gprestore without --report-dir, but with --backup-dir", func() { + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--backup-dir", backupDir, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--backup-dir", backupDir, + "--redirect-db", "restoredb") + + // Since --backup-dir was used, restore report should be in backup dir + checkRestoreMetdataFile(backupDir, timestamp, "report") + }) + It("runs gprestore with --report-dir and same --backup-dir", func() { + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--backup-dir", backupDir, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--backup-dir", backupDir, + "--report-dir", backupDir, + "--redirect-db", "restoredb") + + // Since --report-dir and --backup-dir are the same, restore report should be in backup dir + checkRestoreMetdataFile(backupDir, timestamp, "report") + }) + It("runs gprestore with --report-dir and different --backup-dir", func() { + reportDir := path.Join(backupDir, "restore") + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--backup-dir", backupDir, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--backup-dir", backupDir, + "--report-dir", reportDir, + "--redirect-db", "restoredb") + + // Since --report-dir differs from --backup-dir, restore report should be in report dir + checkRestoreMetdataFile(reportDir, timestamp, "report") + }) + It("runs gprestore with --report-dir and check error_tables* files are present", func() { + if segmentCount != 3 { + Skip("Restoring from a tarred backup currently requires a 3-segment cluster to test.") + } + extractDirectory := extractSavedTarFile(backupDir, "corrupt-db") + reportDir := path.Join(backupDir, "restore") + + // Restore command with data error + // Metadata errors due to invalid alter ownership + gprestoreCmd := exec.Command(gprestorePath, + "--timestamp", "20190809230424", + "--redirect-db", "restoredb", + "--backup-dir", extractDirectory, + "--report-dir", reportDir, + "--on-error-continue") + _, _ = gprestoreCmd.CombinedOutput() + + // All report files should be placed in the same dir + checkRestoreMetdataFile(reportDir, "20190809230424", "report") + checkRestoreMetdataFile(reportDir, "20190809230424", "error_tables_metadata") + checkRestoreMetdataFile(reportDir, "20190809230424", "error_tables_data") + }) + }) Describe("Flag combinations", func() { It("runs gpbackup and gprestore without redirecting restore to another db", func() { err := exec.Command("createdb", "recreateme").Run() diff --git a/filepath/filepath.go b/filepath/filepath.go index e3e77ea92..f1b391d3a 100644 --- a/filepath/filepath.go +++ b/filepath/filepath.go @@ -26,6 +26,7 @@ type FilePathInfo struct { Timestamp string UserSpecifiedBackupDir string UserSpecifiedSegPrefix string + UserSpecifiedReportDir string } func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestamp string, userSegPrefix string) FilePathInfo { @@ -33,6 +34,7 @@ func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestam backupFPInfo.PID = os.Getpid() backupFPInfo.UserSpecifiedBackupDir = userSpecifiedBackupDir backupFPInfo.UserSpecifiedSegPrefix = userSegPrefix + backupFPInfo.UserSpecifiedReportDir = "" backupFPInfo.Timestamp = timestamp backupFPInfo.SegDirMap = make(map[int]string) for _, segment := range c.Segments { @@ -41,6 +43,14 @@ func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestam return backupFPInfo } +/* + * Set user specified dir for report. + * Currently used for restore only. + */ +func (backupFPInfo *FilePathInfo) SetReportDir(userSpecifiedReportDir string) { + backupFPInfo.UserSpecifiedReportDir = userSpecifiedReportDir +} + /* * Restoring a future-dated backup is allowed (e.g. the backup was taken in a * different time zone that is ahead of the restore time zone), so only check @@ -63,6 +73,18 @@ func (backupFPInfo *FilePathInfo) GetDirForContent(contentID int) string { return path.Join(backupFPInfo.SegDirMap[contentID], "backups", backupFPInfo.Timestamp[0:8], backupFPInfo.Timestamp) } +func (backupFPInfo *FilePathInfo) IsUserSpecifiedReportDir() bool { + return backupFPInfo.UserSpecifiedReportDir != "" +} + +func (backupFPInfo *FilePathInfo) GetDirForReport(contentID int) string { + if backupFPInfo.IsUserSpecifiedReportDir() { + segDir := fmt.Sprintf("%s%d", backupFPInfo.UserSpecifiedSegPrefix, contentID) + return path.Join(backupFPInfo.UserSpecifiedReportDir, segDir, "backups", backupFPInfo.Timestamp[0:8], backupFPInfo.Timestamp) + } + return backupFPInfo.GetDirForContent(contentID); +} + func (backupFPInfo *FilePathInfo) replaceCopyFormatStringsInPath(templateFilePath string, contentID int) string { filePath := strings.Replace(templateFilePath, "", backupFPInfo.SegDirMap[contentID], -1) return strings.Replace(filePath, "", strconv.Itoa(contentID), -1) @@ -138,7 +160,7 @@ func (backupFPInfo *FilePathInfo) GetBackupReportFilePath() string { } func (backupFPInfo *FilePathInfo) GetRestoreFilePath(restoreTimestamp string, filetype string) string { - return path.Join(backupFPInfo.GetDirForContent(-1), fmt.Sprintf("gprestore_%s_%s_%s", backupFPInfo.Timestamp, restoreTimestamp, metadataFilenameMap[filetype])) + return path.Join(backupFPInfo.GetDirForReport(-1), fmt.Sprintf("gprestore_%s_%s_%s", backupFPInfo.Timestamp, restoreTimestamp, metadataFilenameMap[filetype])) } func (backupFPInfo *FilePathInfo) GetRestoreReportFilePath(restoreTimestamp string) string { diff --git a/filepath/filepath_test.go b/filepath/filepath_test.go index 7a3debed0..ff4e3d92e 100644 --- a/filepath/filepath_test.go +++ b/filepath/filepath_test.go @@ -106,6 +106,20 @@ var _ = Describe("filepath tests", func() { fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") Expect(fpInfo.GetBackupReportFilePath()).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gpbackup_20170101010101_report")) }) + It("returns report file path for restore command", func() { + fpInfo := NewFilePathInfo(c, "", "20170101010101", "gpseg") + Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/data/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) + }) + It("returns report file path based on user specified path for restore command", func() { + fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") + Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) + }) + It("returns different report file paths based on user specified report path for backup and restore command", func() { + fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") + fpInfo.SetReportDir("/bar/foo") + Expect(fpInfo.GetBackupReportFilePath()).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gpbackup_20170101010101_report")) + Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/bar/foo/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) + }) }) Describe("GetTableBackupFilePath", func() { It("returns table file path", func() { diff --git a/options/flag.go b/options/flag.go index 8dc86ae73..49a76e101 100644 --- a/options/flag.go +++ b/options/flag.go @@ -51,6 +51,7 @@ const ( TRUNCATE_TABLE = "truncate-table" WITHOUT_GLOBALS = "without-globals" RESIZE_CLUSTER = "resize-cluster" + REPORT_DIR = "report-dir" ) func SetBackupFlagDefaults(flagSet *pflag.FlagSet) { @@ -118,6 +119,7 @@ func SetRestoreFlagDefaults(flagSet *pflag.FlagSet) { flagSet.Bool(LEAF_PARTITION_DATA, false, "For partition tables, create one data file per leaf partition instead of one data file for the whole table") flagSet.Bool(RUN_ANALYZE, false, "Run ANALYZE on restored tables") flagSet.Bool(RESIZE_CLUSTER, false, "Restore a backup taken on a cluster with more or fewer segments than the cluster to which it will be restored") + flagSet.String(REPORT_DIR, "", "The absolute path of the directory to which all report files will be written") _ = flagSet.MarkHidden(LEAF_PARTITION_DATA) } diff --git a/restore/restore.go b/restore/restore.go index 7a0ea3913..2e4dcb988 100644 --- a/restore/restore.go +++ b/restore/restore.go @@ -75,6 +75,11 @@ func DoSetup() { segPrefix, err = filepath.ParseSegPrefix(MustGetFlagString(options.BACKUP_DIR)) gplog.FatalOnError(err) globalFPInfo = filepath.NewFilePathInfo(globalCluster, MustGetFlagString(options.BACKUP_DIR), backupTimestamp, segPrefix) + if reportDir := MustGetFlagString(options.REPORT_DIR); reportDir != "" { + globalFPInfo.SetReportDir(reportDir) + info, err := globalCluster.ExecuteLocalCommand(fmt.Sprintf("mkdir -p %s", globalFPInfo.GetDirForReport(-1))) + gplog.FatalOnError(err, info) + } // Get restore metadata from plugin if MustGetFlagString(options.PLUGIN_CONFIG) != "" { From 4ea2f68ebd32dae3555b29b1bcbdf34ea5a5963f Mon Sep 17 00:00:00 2001 From: Vasiliy Ivanov Date: Tue, 25 Jul 2023 16:04:28 +0200 Subject: [PATCH 22/25] Revert "Implement --report-dir option for gprestore (#30)" to reapply above 8bdcb48 and 7346404 This reverts commit ac8731e943801980f8d3b48552f51a86235a38fd. --- end_to_end/end_to_end_suite_test.go | 81 ----------------------------- filepath/filepath.go | 24 +-------- filepath/filepath_test.go | 14 ----- options/flag.go | 2 - restore/restore.go | 5 -- 5 files changed, 1 insertion(+), 125 deletions(-) diff --git a/end_to_end/end_to_end_suite_test.go b/end_to_end/end_to_end_suite_test.go index c24f9061d..b6ba4232e 100644 --- a/end_to_end/end_to_end_suite_test.go +++ b/end_to_end/end_to_end_suite_test.go @@ -331,18 +331,6 @@ func getMetdataFileContents(backupDir string, timestamp string, fileSuffix strin return fileContentBytes } -// check restore file exist and has right permissions -func checkRestoreMetdataFile(backupDir string, timestamp string, fileSuffix string) { - file, err := path.Glob(path.Join(backupDir, "*-1/backups", timestamp[:8], timestamp, fmt.Sprintf("gprestore_%s_*_%s", timestamp, fileSuffix))) - Expect(err).ToNot(HaveOccurred()) - Expect(file).To(HaveLen(1)) - info, err := os.Stat(file[0]) - Expect(err).ToNot(HaveOccurred()) - if info.Mode() != 0444 { - Fail(fmt.Sprintf("File %s is not read-only (mode is %v).", file[0], info.Mode())) - } -} - func saveHistory(myCluster *cluster.Cluster) { // move history file out of the way, and replace in "after". This avoids adding junk to an existing gpackup_history.db @@ -1165,75 +1153,6 @@ var _ = Describe("backup and restore end to end tests", func() { Expect(actualStatisticCount).To(Equal("3")) }) }) - Describe("Restore with --report-dir", func() { - It("runs gprestore without --report-dir", func() { - timestamp := gpbackup(gpbackupPath, backupHelperPath, - "--include-table", "public.sales") - gprestore(gprestorePath, restoreHelperPath, timestamp, - "--redirect-db", "restoredb") - - // Since --report-dir and --backup-dir were not used, restore report should be in default dir - checkRestoreMetdataFile(path.Dir(backupCluster.GetDirForContent(-1)), timestamp, "report") - }) - It("runs gprestore without --report-dir, but with --backup-dir", func() { - timestamp := gpbackup(gpbackupPath, backupHelperPath, - "--backup-dir", backupDir, - "--include-table", "public.sales") - gprestore(gprestorePath, restoreHelperPath, timestamp, - "--backup-dir", backupDir, - "--redirect-db", "restoredb") - - // Since --backup-dir was used, restore report should be in backup dir - checkRestoreMetdataFile(backupDir, timestamp, "report") - }) - It("runs gprestore with --report-dir and same --backup-dir", func() { - timestamp := gpbackup(gpbackupPath, backupHelperPath, - "--backup-dir", backupDir, - "--include-table", "public.sales") - gprestore(gprestorePath, restoreHelperPath, timestamp, - "--backup-dir", backupDir, - "--report-dir", backupDir, - "--redirect-db", "restoredb") - - // Since --report-dir and --backup-dir are the same, restore report should be in backup dir - checkRestoreMetdataFile(backupDir, timestamp, "report") - }) - It("runs gprestore with --report-dir and different --backup-dir", func() { - reportDir := path.Join(backupDir, "restore") - timestamp := gpbackup(gpbackupPath, backupHelperPath, - "--backup-dir", backupDir, - "--include-table", "public.sales") - gprestore(gprestorePath, restoreHelperPath, timestamp, - "--backup-dir", backupDir, - "--report-dir", reportDir, - "--redirect-db", "restoredb") - - // Since --report-dir differs from --backup-dir, restore report should be in report dir - checkRestoreMetdataFile(reportDir, timestamp, "report") - }) - It("runs gprestore with --report-dir and check error_tables* files are present", func() { - if segmentCount != 3 { - Skip("Restoring from a tarred backup currently requires a 3-segment cluster to test.") - } - extractDirectory := extractSavedTarFile(backupDir, "corrupt-db") - reportDir := path.Join(backupDir, "restore") - - // Restore command with data error - // Metadata errors due to invalid alter ownership - gprestoreCmd := exec.Command(gprestorePath, - "--timestamp", "20190809230424", - "--redirect-db", "restoredb", - "--backup-dir", extractDirectory, - "--report-dir", reportDir, - "--on-error-continue") - _, _ = gprestoreCmd.CombinedOutput() - - // All report files should be placed in the same dir - checkRestoreMetdataFile(reportDir, "20190809230424", "report") - checkRestoreMetdataFile(reportDir, "20190809230424", "error_tables_metadata") - checkRestoreMetdataFile(reportDir, "20190809230424", "error_tables_data") - }) - }) Describe("Flag combinations", func() { It("runs gpbackup and gprestore without redirecting restore to another db", func() { err := exec.Command("createdb", "recreateme").Run() diff --git a/filepath/filepath.go b/filepath/filepath.go index 0f6471d8f..239b5c2a5 100644 --- a/filepath/filepath.go +++ b/filepath/filepath.go @@ -26,7 +26,6 @@ type FilePathInfo struct { Timestamp string UserSpecifiedBackupDir string UserSpecifiedSegPrefix string - UserSpecifiedReportDir string } func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestamp string, userSegPrefix string, useMirrors ...bool) FilePathInfo { @@ -34,7 +33,6 @@ func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestam backupFPInfo.PID = os.Getpid() backupFPInfo.UserSpecifiedBackupDir = userSpecifiedBackupDir backupFPInfo.UserSpecifiedSegPrefix = userSegPrefix - backupFPInfo.UserSpecifiedReportDir = "" backupFPInfo.Timestamp = timestamp backupFPInfo.SegDirMap = make(map[int]string) @@ -51,14 +49,6 @@ func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestam return backupFPInfo } -/* - * Set user specified dir for report. - * Currently used for restore only. - */ -func (backupFPInfo *FilePathInfo) SetReportDir(userSpecifiedReportDir string) { - backupFPInfo.UserSpecifiedReportDir = userSpecifiedReportDir -} - /* * Restoring a future-dated backup is allowed (e.g. the backup was taken in a * different time zone that is ahead of the restore time zone), so only check @@ -81,18 +71,6 @@ func (backupFPInfo *FilePathInfo) GetDirForContent(contentID int) string { return path.Join(backupFPInfo.SegDirMap[contentID], "backups", backupFPInfo.Timestamp[0:8], backupFPInfo.Timestamp) } -func (backupFPInfo *FilePathInfo) IsUserSpecifiedReportDir() bool { - return backupFPInfo.UserSpecifiedReportDir != "" -} - -func (backupFPInfo *FilePathInfo) GetDirForReport(contentID int) string { - if backupFPInfo.IsUserSpecifiedReportDir() { - segDir := fmt.Sprintf("%s%d", backupFPInfo.UserSpecifiedSegPrefix, contentID) - return path.Join(backupFPInfo.UserSpecifiedReportDir, segDir, "backups", backupFPInfo.Timestamp[0:8], backupFPInfo.Timestamp) - } - return backupFPInfo.GetDirForContent(contentID); -} - func (backupFPInfo *FilePathInfo) replaceCopyFormatStringsInPath(templateFilePath string, contentID int) string { filePath := strings.Replace(templateFilePath, "", backupFPInfo.SegDirMap[contentID], -1) return strings.Replace(filePath, "", strconv.Itoa(contentID), -1) @@ -168,7 +146,7 @@ func (backupFPInfo *FilePathInfo) GetBackupReportFilePath() string { } func (backupFPInfo *FilePathInfo) GetRestoreFilePath(restoreTimestamp string, filetype string) string { - return path.Join(backupFPInfo.GetDirForReport(-1), fmt.Sprintf("gprestore_%s_%s_%s", backupFPInfo.Timestamp, restoreTimestamp, metadataFilenameMap[filetype])) + return path.Join(backupFPInfo.GetDirForContent(-1), fmt.Sprintf("gprestore_%s_%s_%s", backupFPInfo.Timestamp, restoreTimestamp, metadataFilenameMap[filetype])) } func (backupFPInfo *FilePathInfo) GetRestoreReportFilePath(restoreTimestamp string) string { diff --git a/filepath/filepath_test.go b/filepath/filepath_test.go index 9d845f02c..9b58fd15d 100644 --- a/filepath/filepath_test.go +++ b/filepath/filepath_test.go @@ -140,20 +140,6 @@ var _ = Describe("filepath tests", func() { fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") Expect(fpInfo.GetBackupReportFilePath()).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gpbackup_20170101010101_report")) }) - It("returns report file path for restore command", func() { - fpInfo := NewFilePathInfo(c, "", "20170101010101", "gpseg") - Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/data/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) - }) - It("returns report file path based on user specified path for restore command", func() { - fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") - Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) - }) - It("returns different report file paths based on user specified report path for backup and restore command", func() { - fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") - fpInfo.SetReportDir("/bar/foo") - Expect(fpInfo.GetBackupReportFilePath()).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gpbackup_20170101010101_report")) - Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/bar/foo/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) - }) }) Describe("GetTableBackupFilePath", func() { It("returns table file path", func() { diff --git a/options/flag.go b/options/flag.go index 49a76e101..8dc86ae73 100644 --- a/options/flag.go +++ b/options/flag.go @@ -51,7 +51,6 @@ const ( TRUNCATE_TABLE = "truncate-table" WITHOUT_GLOBALS = "without-globals" RESIZE_CLUSTER = "resize-cluster" - REPORT_DIR = "report-dir" ) func SetBackupFlagDefaults(flagSet *pflag.FlagSet) { @@ -119,7 +118,6 @@ func SetRestoreFlagDefaults(flagSet *pflag.FlagSet) { flagSet.Bool(LEAF_PARTITION_DATA, false, "For partition tables, create one data file per leaf partition instead of one data file for the whole table") flagSet.Bool(RUN_ANALYZE, false, "Run ANALYZE on restored tables") flagSet.Bool(RESIZE_CLUSTER, false, "Restore a backup taken on a cluster with more or fewer segments than the cluster to which it will be restored") - flagSet.String(REPORT_DIR, "", "The absolute path of the directory to which all report files will be written") _ = flagSet.MarkHidden(LEAF_PARTITION_DATA) } diff --git a/restore/restore.go b/restore/restore.go index 2e4dcb988..7a0ea3913 100644 --- a/restore/restore.go +++ b/restore/restore.go @@ -75,11 +75,6 @@ func DoSetup() { segPrefix, err = filepath.ParseSegPrefix(MustGetFlagString(options.BACKUP_DIR)) gplog.FatalOnError(err) globalFPInfo = filepath.NewFilePathInfo(globalCluster, MustGetFlagString(options.BACKUP_DIR), backupTimestamp, segPrefix) - if reportDir := MustGetFlagString(options.REPORT_DIR); reportDir != "" { - globalFPInfo.SetReportDir(reportDir) - info, err := globalCluster.ExecuteLocalCommand(fmt.Sprintf("mkdir -p %s", globalFPInfo.GetDirForReport(-1))) - gplog.FatalOnError(err, info) - } // Get restore metadata from plugin if MustGetFlagString(options.PLUGIN_CONFIG) != "" { From 718cd8ba92edeb9fb679449a17ec55ef22cbd860 Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Tue, 18 Jul 2023 11:12:34 +0500 Subject: [PATCH 23/25] Implement --report-dir option for gprestore (#30) Implement --report-dir option for gprestore New option allows to create report files in directory different from --backup-dir. Main code changes were made for filepath class. Now it takes new option as deafult path for reports with fallback to content dir. Unit and end_to_end tests were updated to show mentioned behavior. Cherry-picked-from: ac8731e to reapply above 8bdcb48 and 7346404 --- end_to_end/end_to_end_suite_test.go | 81 +++++++++++++++++++++++++++++ filepath/filepath.go | 24 ++++++++- filepath/filepath_test.go | 14 +++++ options/flag.go | 2 + restore/restore.go | 5 ++ 5 files changed, 125 insertions(+), 1 deletion(-) diff --git a/end_to_end/end_to_end_suite_test.go b/end_to_end/end_to_end_suite_test.go index 3a0fe2412..557e179e7 100644 --- a/end_to_end/end_to_end_suite_test.go +++ b/end_to_end/end_to_end_suite_test.go @@ -363,6 +363,18 @@ func getMetdataFileContents(backupDir string, timestamp string, fileSuffix strin return fileContentBytes } +// check restore file exist and has right permissions +func checkRestoreMetdataFile(backupDir string, timestamp string, fileSuffix string) { + file, err := path.Glob(path.Join(backupDir, "*-1/backups", timestamp[:8], timestamp, fmt.Sprintf("gprestore_%s_*_%s", timestamp, fileSuffix))) + Expect(err).ToNot(HaveOccurred()) + Expect(file).To(HaveLen(1)) + info, err := os.Stat(file[0]) + Expect(err).ToNot(HaveOccurred()) + if info.Mode() != 0444 { + Fail(fmt.Sprintf("File %s is not read-only (mode is %v).", file[0], info.Mode())) + } +} + func saveHistory(myCluster *cluster.Cluster) { // move history file out of the way, and replace in "after". This avoids adding junk to an existing gpackup_history.db @@ -1202,6 +1214,75 @@ var _ = Describe("backup and restore end to end tests", func() { Expect(actualStatisticCount).To(Equal("3")) }) }) + Describe("Restore with --report-dir", func() { + It("runs gprestore without --report-dir", func() { + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--redirect-db", "restoredb") + + // Since --report-dir and --backup-dir were not used, restore report should be in default dir + checkRestoreMetdataFile(path.Dir(backupCluster.GetDirForContent(-1)), timestamp, "report") + }) + It("runs gprestore without --report-dir, but with --backup-dir", func() { + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--backup-dir", backupDir, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--backup-dir", backupDir, + "--redirect-db", "restoredb") + + // Since --backup-dir was used, restore report should be in backup dir + checkRestoreMetdataFile(backupDir, timestamp, "report") + }) + It("runs gprestore with --report-dir and same --backup-dir", func() { + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--backup-dir", backupDir, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--backup-dir", backupDir, + "--report-dir", backupDir, + "--redirect-db", "restoredb") + + // Since --report-dir and --backup-dir are the same, restore report should be in backup dir + checkRestoreMetdataFile(backupDir, timestamp, "report") + }) + It("runs gprestore with --report-dir and different --backup-dir", func() { + reportDir := path.Join(backupDir, "restore") + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--backup-dir", backupDir, + "--include-table", "public.sales") + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--backup-dir", backupDir, + "--report-dir", reportDir, + "--redirect-db", "restoredb") + + // Since --report-dir differs from --backup-dir, restore report should be in report dir + checkRestoreMetdataFile(reportDir, timestamp, "report") + }) + It("runs gprestore with --report-dir and check error_tables* files are present", func() { + if segmentCount != 3 { + Skip("Restoring from a tarred backup currently requires a 3-segment cluster to test.") + } + extractDirectory := extractSavedTarFile(backupDir, "corrupt-db") + reportDir := path.Join(backupDir, "restore") + + // Restore command with data error + // Metadata errors due to invalid alter ownership + gprestoreCmd := exec.Command(gprestorePath, + "--timestamp", "20190809230424", + "--redirect-db", "restoredb", + "--backup-dir", extractDirectory, + "--report-dir", reportDir, + "--on-error-continue") + _, _ = gprestoreCmd.CombinedOutput() + + // All report files should be placed in the same dir + checkRestoreMetdataFile(reportDir, "20190809230424", "report") + checkRestoreMetdataFile(reportDir, "20190809230424", "error_tables_metadata") + checkRestoreMetdataFile(reportDir, "20190809230424", "error_tables_data") + }) + }) Describe("Flag combinations", func() { It("runs gpbackup and gprestore without redirecting restore to another db", func() { err := exec.Command("createdb", "recreateme").Run() diff --git a/filepath/filepath.go b/filepath/filepath.go index a5f0fcf7e..5367497d9 100644 --- a/filepath/filepath.go +++ b/filepath/filepath.go @@ -27,6 +27,7 @@ type FilePathInfo struct { UserSpecifiedBackupDir string UserSpecifiedSegPrefix string BaseDataDir string + UserSpecifiedReportDir string } func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestamp string, userSegPrefix string, useMirrors ...bool) FilePathInfo { @@ -34,6 +35,7 @@ func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestam backupFPInfo.PID = os.Getpid() backupFPInfo.UserSpecifiedBackupDir = userSpecifiedBackupDir backupFPInfo.UserSpecifiedSegPrefix = userSegPrefix + backupFPInfo.UserSpecifiedReportDir = "" backupFPInfo.Timestamp = timestamp backupFPInfo.SegDirMap = make(map[int]string) backupFPInfo.BaseDataDir = "" @@ -51,6 +53,14 @@ func NewFilePathInfo(c *cluster.Cluster, userSpecifiedBackupDir string, timestam return backupFPInfo } +/* + * Set user specified dir for report. + * Currently used for restore only. + */ +func (backupFPInfo *FilePathInfo) SetReportDir(userSpecifiedReportDir string) { + backupFPInfo.UserSpecifiedReportDir = userSpecifiedReportDir +} + /* * Restoring a future-dated backup is allowed (e.g. the backup was taken in a * different time zone that is ahead of the restore time zone), so only check @@ -73,6 +83,18 @@ func (backupFPInfo *FilePathInfo) GetDirForContent(contentID int) string { return path.Join(backupFPInfo.SegDirMap[contentID], "backups", backupFPInfo.Timestamp[0:8], backupFPInfo.Timestamp) } +func (backupFPInfo *FilePathInfo) IsUserSpecifiedReportDir() bool { + return backupFPInfo.UserSpecifiedReportDir != "" +} + +func (backupFPInfo *FilePathInfo) GetDirForReport(contentID int) string { + if backupFPInfo.IsUserSpecifiedReportDir() { + segDir := fmt.Sprintf("%s%d", backupFPInfo.UserSpecifiedSegPrefix, contentID) + return path.Join(backupFPInfo.UserSpecifiedReportDir, segDir, "backups", backupFPInfo.Timestamp[0:8], backupFPInfo.Timestamp) + } + return backupFPInfo.GetDirForContent(contentID); +} + func (backupFPInfo *FilePathInfo) replaceCopyFormatStringsInPath(templateFilePath string, contentID int) string { filePath := strings.Replace(templateFilePath, "", backupFPInfo.SegDirMap[contentID], -1) return strings.Replace(filePath, "", strconv.Itoa(contentID), -1) @@ -148,7 +170,7 @@ func (backupFPInfo *FilePathInfo) GetBackupReportFilePath() string { } func (backupFPInfo *FilePathInfo) GetRestoreFilePath(restoreTimestamp string, filetype string) string { - return path.Join(backupFPInfo.GetDirForContent(-1), fmt.Sprintf("gprestore_%s_%s_%s", backupFPInfo.Timestamp, restoreTimestamp, metadataFilenameMap[filetype])) + return path.Join(backupFPInfo.GetDirForReport(-1), fmt.Sprintf("gprestore_%s_%s_%s", backupFPInfo.Timestamp, restoreTimestamp, metadataFilenameMap[filetype])) } func (backupFPInfo *FilePathInfo) GetRestoreReportFilePath(restoreTimestamp string) string { diff --git a/filepath/filepath_test.go b/filepath/filepath_test.go index 9b58fd15d..9d845f02c 100644 --- a/filepath/filepath_test.go +++ b/filepath/filepath_test.go @@ -140,6 +140,20 @@ var _ = Describe("filepath tests", func() { fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") Expect(fpInfo.GetBackupReportFilePath()).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gpbackup_20170101010101_report")) }) + It("returns report file path for restore command", func() { + fpInfo := NewFilePathInfo(c, "", "20170101010101", "gpseg") + Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/data/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) + }) + It("returns report file path based on user specified path for restore command", func() { + fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") + Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) + }) + It("returns different report file paths based on user specified report path for backup and restore command", func() { + fpInfo := NewFilePathInfo(c, "/foo/bar", "20170101010101", "gpseg") + fpInfo.SetReportDir("/bar/foo") + Expect(fpInfo.GetBackupReportFilePath()).To(Equal("/foo/bar/gpseg-1/backups/20170101/20170101010101/gpbackup_20170101010101_report")) + Expect(fpInfo.GetRestoreReportFilePath("20200101010101")).To(Equal("/bar/foo/gpseg-1/backups/20170101/20170101010101/gprestore_20170101010101_20200101010101_report")) + }) }) Describe("GetTableBackupFilePath", func() { It("returns table file path", func() { diff --git a/options/flag.go b/options/flag.go index de2004fb6..c550fd663 100644 --- a/options/flag.go +++ b/options/flag.go @@ -52,6 +52,7 @@ const ( WITHOUT_GLOBALS = "without-globals" RESIZE_CLUSTER = "resize-cluster" NO_INHERITS = "no-inherits" + REPORT_DIR = "report-dir" ) func SetBackupFlagDefaults(flagSet *pflag.FlagSet) { @@ -120,6 +121,7 @@ func SetRestoreFlagDefaults(flagSet *pflag.FlagSet) { flagSet.Bool(LEAF_PARTITION_DATA, false, "For partition tables, create one data file per leaf partition instead of one data file for the whole table") flagSet.Bool(RUN_ANALYZE, false, "Run ANALYZE on restored tables") flagSet.Bool(RESIZE_CLUSTER, false, "Restore a backup taken on a cluster with more or fewer segments than the cluster to which it will be restored") + flagSet.String(REPORT_DIR, "", "The absolute path of the directory to which all report files will be written") _ = flagSet.MarkHidden(LEAF_PARTITION_DATA) } diff --git a/restore/restore.go b/restore/restore.go index 120bf855e..39a4ade5d 100644 --- a/restore/restore.go +++ b/restore/restore.go @@ -75,6 +75,11 @@ func DoSetup() { segPrefix, err = filepath.ParseSegPrefix(MustGetFlagString(options.BACKUP_DIR)) gplog.FatalOnError(err) globalFPInfo = filepath.NewFilePathInfo(globalCluster, MustGetFlagString(options.BACKUP_DIR), backupTimestamp, segPrefix) + if reportDir := MustGetFlagString(options.REPORT_DIR); reportDir != "" { + globalFPInfo.SetReportDir(reportDir) + info, err := globalCluster.ExecuteLocalCommand(fmt.Sprintf("mkdir -p %s", globalFPInfo.GetDirForReport(-1))) + gplog.FatalOnError(err, info) + } // Get restore metadata from plugin if MustGetFlagString(options.PLUGIN_CONFIG) != "" { From faba58c9d6476f4e839f8c00fca66cae2cc817ee Mon Sep 17 00:00:00 2001 From: Alexey Gordeev Date: Mon, 31 Jul 2023 17:28:48 +0500 Subject: [PATCH 24/25] Backup privilege statements as separate entries (#31) Before patch, all privilege statements placed in metadata file as one pre-data section entry. `editStatementsRedirectSchema()`, used by schema redirection, replace only first schema name occurrence in entry. This caused a bug - only first privilege statement was modifed. This, in turn, caused vaious errors. `gprestore`, used on same database, applied most of privilege statements to original schema. Or, when used on different database, caused `schema does not exist` error. From now, `GetPrivilegesStatements()` returns a set of separate statements, which later used as separate entries. `strings.TrimSpace()` was removed as all statements are already trimmed. Existed tests were modified as separate entries have different format. New test shows correct privilege applying. --- backup/metadata_globals_test.go | 12 +++---- backup/predata_acl.go | 15 ++++----- backup/predata_acl_test.go | 44 +++++++++++++++++++++++++ backup/predata_externals_test.go | 12 +++---- backup/predata_functions_test.go | 18 +++++----- backup/predata_relations.go | 2 +- backup/predata_relations_other_test.go | 16 ++++----- backup/predata_relations_tables_test.go | 12 +++++++ backup/predata_shared_test.go | 6 ++-- end_to_end/end_to_end_suite_test.go | 38 +++++++++++++++++++++ 10 files changed, 133 insertions(+), 42 deletions(-) diff --git a/backup/metadata_globals_test.go b/backup/metadata_globals_test.go index 2bba402a4..d7b5fd0e3 100644 --- a/backup/metadata_globals_test.go +++ b/backup/metadata_globals_test.go @@ -50,9 +50,9 @@ var _ = Describe("backup/metadata_globals tests", func() { `CREATE DATABASE testdb TEMPLATE template0;`, `COMMENT ON DATABASE testdb IS 'This is a database comment.';`, `ALTER DATABASE testdb OWNER TO testrole;`, - `REVOKE ALL ON DATABASE testdb FROM PUBLIC; -REVOKE ALL ON DATABASE testdb FROM testrole; -GRANT TEMPORARY,CONNECT ON DATABASE testdb TO testrole;`, + `REVOKE ALL ON DATABASE testdb FROM PUBLIC;`, + `REVOKE ALL ON DATABASE testdb FROM testrole;`, + `GRANT TEMPORARY,CONNECT ON DATABASE testdb TO testrole;`, `SECURITY LABEL FOR dummy ON DATABASE testdb IS 'unclassified';`} testutils.AssertBufferContents(tocfile.GlobalEntries, buffer, expectedStatements...) }) @@ -428,9 +428,9 @@ ALTER ROLE "testRole2" WITH SUPERUSER INHERIT CREATEROLE CREATEDB LOGIN REPLICAT `CREATE TABLESPACE test_tablespace FILESPACE test_filespace;`, `COMMENT ON TABLESPACE test_tablespace IS 'This is a tablespace comment.';`, `ALTER TABLESPACE test_tablespace OWNER TO testrole;`, - `REVOKE ALL ON TABLESPACE test_tablespace FROM PUBLIC; -REVOKE ALL ON TABLESPACE test_tablespace FROM testrole; -GRANT ALL ON TABLESPACE test_tablespace TO testrole;`, + `REVOKE ALL ON TABLESPACE test_tablespace FROM PUBLIC;`, + `REVOKE ALL ON TABLESPACE test_tablespace FROM testrole;`, + `GRANT ALL ON TABLESPACE test_tablespace TO testrole;`, `SECURITY LABEL FOR dummy ON TABLESPACE test_tablespace IS 'unclassified';`} testutils.AssertBufferContents(tocfile.GlobalEntries, buffer, expectedStatements...) diff --git a/backup/predata_acl.go b/backup/predata_acl.go index 757695f60..3b0be7253 100644 --- a/backup/predata_acl.go +++ b/backup/predata_acl.go @@ -108,8 +108,8 @@ func PrintObjectMetadata(metadataFile *utils.FileWithByteCount, toc *toc.TOC, statements = append(statements, strings.TrimSpace(owner)) } } - if privileges := metadata.GetPrivilegesStatements(obj.FQN(), entry.ObjectType); privileges != "" { - statements = append(statements, strings.TrimSpace(privileges)) + if privileges := metadata.GetPrivilegesStatements(obj.FQN(), entry.ObjectType); len(privileges) > 0 { + statements = append(statements, privileges...); } if securityLabel := metadata.GetSecurityLabelStatement(obj.FQN(), entry.ObjectType); securityLabel != "" { statements = append(statements, strings.TrimSpace(securityLabel)) @@ -138,8 +138,8 @@ func printExtensionFunctionACLs(metadataFile *utils.FileWithByteCount, toc *toc. }) statements := make([]string, 0) for _, obj := range objects { - if privileges := obj.GetPrivilegesStatements(obj.FQN(), "FUNCTION"); privileges != "" { - statements = append(statements, strings.TrimSpace(privileges)) + if privileges := obj.GetPrivilegesStatements(obj.FQN(), "FUNCTION"); len(privileges) > 0 { + statements = append(statements, privileges...); PrintStatements(metadataFile, toc, obj, statements) } } @@ -290,7 +290,7 @@ func ParseACL(aclStr string) *ACL { return nil } -func (obj ObjectMetadata) GetPrivilegesStatements(objectName string, objectType string, columnName ...string) string { +func (obj ObjectMetadata) GetPrivilegesStatements(objectName string, objectType string, columnName ...string) []string { statements := make([]string, 0) typeStr := fmt.Sprintf("%s ", objectType) if objectType == "VIEW" || objectType == "FOREIGN TABLE" || objectType == "MATERIALIZED VIEW" { @@ -325,10 +325,7 @@ func (obj ObjectMetadata) GetPrivilegesStatements(objectName string, objectType } } } - if len(statements) > 0 { - return "\n\n" + strings.Join(statements, "\n") - } - return "" + return statements } func createPrivilegeStrings(acl ACL, objectType string) (string, string) { diff --git a/backup/predata_acl_test.go b/backup/predata_acl_test.go index 888ce083f..7f173602a 100644 --- a/backup/predata_acl_test.go +++ b/backup/predata_acl_test.go @@ -57,8 +57,14 @@ ALTER TABLE public.tablename OWNER TO testrole;`) testhelper.ExpectRegexp(buffer, ` REVOKE ALL ON TABLE public.tablename FROM PUBLIC; + + GRANT ALL ON TABLE public.tablename TO anothertestrole; + + GRANT SELECT,INSERT,UPDATE,DELETE,TRUNCATE,REFERENCES ON TABLE public.tablename TO testrole; + + GRANT TRIGGER ON TABLE public.tablename TO PUBLIC;`) }) It("prints a block of REVOKE and GRANT statements WITH GRANT OPTION", func() { @@ -67,8 +73,14 @@ GRANT TRIGGER ON TABLE public.tablename TO PUBLIC;`) testhelper.ExpectRegexp(buffer, ` REVOKE ALL ON TABLE public.tablename FROM PUBLIC; + + GRANT ALL ON TABLE public.tablename TO anothertestrole WITH GRANT OPTION; + + GRANT SELECT,INSERT,UPDATE,DELETE,TRUNCATE,REFERENCES ON TABLE public.tablename TO testrole WITH GRANT OPTION; + + GRANT TRIGGER ON TABLE public.tablename TO PUBLIC WITH GRANT OPTION;`) }) It("prints a block of REVOKE and GRANT statements, some with WITH GRANT OPTION, some without", func() { @@ -77,7 +89,11 @@ GRANT TRIGGER ON TABLE public.tablename TO PUBLIC WITH GRANT OPTION;`) testhelper.ExpectRegexp(buffer, ` REVOKE ALL ON TABLE public.tablename FROM PUBLIC; + + GRANT ALL ON TABLE public.tablename TO anothertestrole; + + GRANT SELECT,INSERT,UPDATE,DELETE,TRUNCATE,REFERENCES ON TABLE public.tablename TO testrole WITH GRANT OPTION;`) }) It("prints both an ALTER TABLE ... OWNER TO statement and a table comment", func() { @@ -99,9 +115,17 @@ ALTER TABLE public.tablename OWNER TO testrole; REVOKE ALL ON TABLE public.tablename FROM PUBLIC; + + REVOKE ALL ON TABLE public.tablename FROM testrole; + + GRANT ALL ON TABLE public.tablename TO anothertestrole; + + GRANT SELECT,INSERT,UPDATE,DELETE,TRUNCATE,REFERENCES ON TABLE public.tablename TO testrole; + + GRANT TRIGGER ON TABLE public.tablename TO PUBLIC;`) }) It("prints both a block of REVOKE and GRANT statements and a table comment", func() { @@ -113,8 +137,14 @@ COMMENT ON TABLE public.tablename IS 'This is a table comment.'; REVOKE ALL ON TABLE public.tablename FROM PUBLIC; + + GRANT ALL ON TABLE public.tablename TO anothertestrole; + + GRANT SELECT,INSERT,UPDATE,DELETE,TRUNCATE,REFERENCES ON TABLE public.tablename TO testrole; + + GRANT TRIGGER ON TABLE public.tablename TO PUBLIC;`) }) It("prints REVOKE and GRANT statements, an ALTER TABLE ... OWNER TO statement, and comments", func() { @@ -129,9 +159,17 @@ ALTER TABLE public.tablename OWNER TO testrole; REVOKE ALL ON TABLE public.tablename FROM PUBLIC; + + REVOKE ALL ON TABLE public.tablename FROM testrole; + + GRANT ALL ON TABLE public.tablename TO anothertestrole; + + GRANT SELECT,INSERT,UPDATE,DELETE,TRUNCATE,REFERENCES ON TABLE public.tablename TO testrole; + + GRANT TRIGGER ON TABLE public.tablename TO PUBLIC;`) }) It("prints SERVER for ALTER and FOREIGN SERVER for GRANT/REVOKE for a foreign server", func() { @@ -145,7 +183,11 @@ ALTER SERVER foreignserver OWNER TO testrole; REVOKE ALL ON FOREIGN SERVER foreignserver FROM PUBLIC; + + REVOKE ALL ON FOREIGN SERVER foreignserver FROM testrole; + + GRANT ALL ON FOREIGN SERVER foreignserver TO testrole;`) }) It("prints FUNCTION for REVOKE and AGGREGATE for ALTER for an aggregate function", func() { @@ -159,6 +201,8 @@ ALTER AGGREGATE public.testagg(*) OWNER TO testrole; REVOKE ALL ON FUNCTION public.testagg(*) FROM PUBLIC; + + REVOKE ALL ON FUNCTION public.testagg(*) FROM testrole;`) }) Context("Views and sequences have owners", func() { diff --git a/backup/predata_externals_test.go b/backup/predata_externals_test.go index 9dc0f19fa..dd4c0de5b 100644 --- a/backup/predata_externals_test.go +++ b/backup/predata_externals_test.go @@ -491,9 +491,9 @@ SEGMENT REJECT LIMIT 2 ROWS`) expectedStatements := []string{ "CREATE PROTOCOL s3 (readfunc = public.read_fn_s3, writefunc = public.write_fn_s3);", "ALTER PROTOCOL s3 OWNER TO testrole;", - `REVOKE ALL ON PROTOCOL s3 FROM PUBLIC; -REVOKE ALL ON PROTOCOL s3 FROM testrole; -GRANT ALL ON PROTOCOL s3 TO testrole;`} + `REVOKE ALL ON PROTOCOL s3 FROM PUBLIC;`, + `REVOKE ALL ON PROTOCOL s3 FROM testrole;`, + `GRANT ALL ON PROTOCOL s3 TO testrole;`} testutils.AssertBufferContents(tocfile.PredataEntries, buffer, expectedStatements...) }) It("prints a protocol ACL even when the protocol's CREATE statement is skipped", func() { @@ -511,9 +511,9 @@ GRANT ALL ON PROTOCOL s3 TO testrole;`} backup.PrintCreateExternalProtocolStatement(backupfile, tocfile, protocolUntrustedReadWrite, pgCatalogFuncInfoMap, protoMetadata) expectedStatements := []string{ "ALTER PROTOCOL s3 OWNER TO testrole;", - `REVOKE ALL ON PROTOCOL s3 FROM PUBLIC; -REVOKE ALL ON PROTOCOL s3 FROM testrole; -GRANT ALL ON PROTOCOL s3 TO testrole;`} + `REVOKE ALL ON PROTOCOL s3 FROM PUBLIC;`, + `REVOKE ALL ON PROTOCOL s3 FROM testrole;`, + `GRANT ALL ON PROTOCOL s3 TO testrole;`} testutils.AssertBufferContents(tocfile.PredataEntries, buffer, expectedStatements...) }) }) diff --git a/backup/predata_functions_test.go b/backup/predata_functions_test.go index 375d3223c..dbfeee39e 100644 --- a/backup/predata_functions_test.go +++ b/backup/predata_functions_test.go @@ -61,9 +61,9 @@ $$add_two_ints$$ LANGUAGE internal%s;`, DEFAULT_PARALLEL), "COMMENT ON FUNCTION public.func_name(integer, integer) IS 'This is a function comment.';", "ALTER FUNCTION public.func_name(integer, integer) OWNER TO testrole;", - `REVOKE ALL ON FUNCTION public.func_name(integer, integer) FROM PUBLIC; -REVOKE ALL ON FUNCTION public.func_name(integer, integer) FROM testrole; -GRANT ALL ON FUNCTION public.func_name(integer, integer) TO testrole;`, + `REVOKE ALL ON FUNCTION public.func_name(integer, integer) FROM PUBLIC;`, + `REVOKE ALL ON FUNCTION public.func_name(integer, integer) FROM testrole;`, + `GRANT ALL ON FUNCTION public.func_name(integer, integer) TO testrole;`, "SECURITY LABEL FOR dummy ON FUNCTION public.func_name(integer, integer) IS 'unclassified';"} testutils.AssertBufferContents(tocfile.PredataEntries, buffer, expectedStatements...) @@ -825,9 +825,9 @@ ALTER FUNCTION pg_catalog.plperl_validator(oid) OWNER TO testrole;`, // Languages have implicit owners in 4.3, but do not support ALTER OWNER expectedStatements = append(expectedStatements, "ALTER LANGUAGE plpythonu OWNER TO testrole;") } - expectedStatements = append(expectedStatements, `REVOKE ALL ON LANGUAGE plpythonu FROM PUBLIC; -REVOKE ALL ON LANGUAGE plpythonu FROM testrole; -GRANT ALL ON LANGUAGE plpythonu TO testrole;`, + expectedStatements = append(expectedStatements, `REVOKE ALL ON LANGUAGE plpythonu FROM PUBLIC;`, + `REVOKE ALL ON LANGUAGE plpythonu FROM testrole;`, + `GRANT ALL ON LANGUAGE plpythonu TO testrole;`, "SECURITY LABEL FOR dummy ON LANGUAGE plpythonu IS 'unclassified';") testutils.AssertBufferContents(tocfile.PredataEntries, buffer, expectedStatements...) @@ -853,9 +853,9 @@ GRANT ALL ON LANGUAGE plpythonu TO testrole;`, // Languages have implicit owners in 4.3, but do not support ALTER OWNER expectedStatements = append(expectedStatements, `ALTER LANGUAGE plperl OWNER TO testrole;`) } - expectedStatements = append(expectedStatements, `REVOKE ALL ON LANGUAGE plperl FROM PUBLIC; -REVOKE ALL ON LANGUAGE plperl FROM testrole; -GRANT ALL ON LANGUAGE plperl TO testrole;`, + expectedStatements = append(expectedStatements, `REVOKE ALL ON LANGUAGE plperl FROM PUBLIC;`, + `REVOKE ALL ON LANGUAGE plperl FROM testrole;`, + `GRANT ALL ON LANGUAGE plperl TO testrole;`, "SECURITY LABEL FOR dummy ON LANGUAGE plperl IS 'unclassified';") testutils.AssertBufferContents(tocfile.PredataEntries, buffer, expectedStatements...) diff --git a/backup/predata_relations.go b/backup/predata_relations.go index 75beadb87..c296f8765 100644 --- a/backup/predata_relations.go +++ b/backup/predata_relations.go @@ -254,7 +254,7 @@ func PrintPostCreateTableStatements(metadataFile *utils.FileWithByteCount, toc * if att.Privileges.Valid { columnMetadata := ObjectMetadata{Privileges: getColumnACL(att.Privileges, att.Kind), Owner: tableMetadata.Owner} columnPrivileges := columnMetadata.GetPrivilegesStatements(table.FQN(), "COLUMN", att.Name) - statements = append(statements, strings.TrimSpace(columnPrivileges)) + statements = append(statements, columnPrivileges...) } if att.SecurityLabel != "" { escapedLabel := utils.EscapeSingleQuotes(att.SecurityLabel) diff --git a/backup/predata_relations_other_test.go b/backup/predata_relations_other_test.go index 133c407c4..71df28afe 100644 --- a/backup/predata_relations_other_test.go +++ b/backup/predata_relations_other_test.go @@ -217,9 +217,9 @@ SELECT pg_catalog.setval('public.seq_''name', 7, true);`, getSeqDefReplace())) SELECT pg_catalog.setval('public.seq_name', 7, true);`, getSeqDefReplace()), "COMMENT ON SEQUENCE public.seq_name IS 'This is a sequence comment.';", fmt.Sprintf("ALTER %s public.seq_name OWNER TO testrole;", keywordReplace), - `REVOKE ALL ON SEQUENCE public.seq_name FROM PUBLIC; -REVOKE ALL ON SEQUENCE public.seq_name FROM testrole; -GRANT SELECT,USAGE ON SEQUENCE public.seq_name TO testrole;`} + `REVOKE ALL ON SEQUENCE public.seq_name FROM PUBLIC;`, + `REVOKE ALL ON SEQUENCE public.seq_name FROM testrole;`, + `GRANT SELECT,USAGE ON SEQUENCE public.seq_name TO testrole;`} testutils.AssertBufferContents(tocfile.PredataEntries, buffer, expectedEntries...) }) It("can print a sequence with privileges WITH GRANT OPTION", func() { @@ -235,8 +235,8 @@ GRANT SELECT,USAGE ON SEQUENCE public.seq_name TO testrole;`} CACHE 5; SELECT pg_catalog.setval('public.seq_name', 7, true);`, getSeqDefReplace()), - `REVOKE ALL ON SEQUENCE public.seq_name FROM PUBLIC; -GRANT SELECT,USAGE ON SEQUENCE public.seq_name TO testrole WITH GRANT OPTION;`) + `REVOKE ALL ON SEQUENCE public.seq_name FROM PUBLIC;`, + `GRANT SELECT,USAGE ON SEQUENCE public.seq_name TO testrole WITH GRANT OPTION;`) }) It("prints data_type of the sequence", func() { if connectionPool.Version.Before("7") { @@ -311,9 +311,9 @@ SELECT pg_catalog.setval('public.seq_name', 10, true);`, getSeqDefReplace()), expectedEntries := []string{"CREATE VIEW shamwow.shazam AS SELECT count(*) FROM pg_tables;", "COMMENT ON VIEW shamwow.shazam IS 'This is a view comment.';", fmt.Sprintf("ALTER %s shamwow.shazam OWNER TO testrole;", keywordReplace), - `REVOKE ALL ON shamwow.shazam FROM PUBLIC; -REVOKE ALL ON shamwow.shazam FROM testrole; -GRANT ALL ON shamwow.shazam TO testrole;`} + `REVOKE ALL ON shamwow.shazam FROM PUBLIC;`, + `REVOKE ALL ON shamwow.shazam FROM testrole;`, + `GRANT ALL ON shamwow.shazam TO testrole;`} if connectionPool.Version.AtLeast("6") { expectedEntries = append(expectedEntries, "SECURITY LABEL FOR dummy ON VIEW shamwow.shazam IS 'unclassified';") diff --git a/backup/predata_relations_tables_test.go b/backup/predata_relations_tables_test.go index 0bed7f11d..9d1f111f3 100644 --- a/backup/predata_relations_tables_test.go +++ b/backup/predata_relations_tables_test.go @@ -617,7 +617,11 @@ ALTER FOREIGN TABLE public.tablename OWNER TO testrole; REVOKE ALL ON public.tablename FROM PUBLIC; + + REVOKE ALL ON public.tablename FROM testrole; + + GRANT ALL ON public.tablename TO testrole; @@ -654,12 +658,20 @@ ALTER TABLE public.tablename OWNER TO testrole; REVOKE ALL (i) ON TABLE public.tablename FROM PUBLIC; + + REVOKE ALL (i) ON TABLE public.tablename FROM testrole; + + GRANT SELECT (i) ON TABLE public.tablename TO testrole; REVOKE ALL (j) ON TABLE public.tablename FROM PUBLIC; + + REVOKE ALL (j) ON TABLE public.tablename FROM testrole; + + GRANT ALL (j) ON TABLE public.tablename TO testrole2;`) }) It("prints a security group statement on a table column", func() { diff --git a/backup/predata_shared_test.go b/backup/predata_shared_test.go index 91df019f8..75dff3b96 100644 --- a/backup/predata_shared_test.go +++ b/backup/predata_shared_test.go @@ -156,9 +156,9 @@ var _ = Describe("backup/predata_shared tests", func() { expectedStatements := []string{"CREATE SCHEMA schemaname;", "COMMENT ON SCHEMA schemaname IS 'This is a schema comment.';", "ALTER SCHEMA schemaname OWNER TO testrole;", - `REVOKE ALL ON SCHEMA schemaname FROM PUBLIC; -REVOKE ALL ON SCHEMA schemaname FROM testrole; -GRANT ALL ON SCHEMA schemaname TO testrole;`, + `REVOKE ALL ON SCHEMA schemaname FROM PUBLIC;`, + `REVOKE ALL ON SCHEMA schemaname FROM testrole;`, + `GRANT ALL ON SCHEMA schemaname TO testrole;`, "SECURITY LABEL FOR dummy ON SCHEMA schemaname IS 'unclassified';"} testutils.AssertBufferContents(tocfile.PredataEntries, buffer, expectedStatements...) }) diff --git a/end_to_end/end_to_end_suite_test.go b/end_to_end/end_to_end_suite_test.go index 557e179e7..dc07acdda 100644 --- a/end_to_end/end_to_end_suite_test.go +++ b/end_to_end/end_to_end_suite_test.go @@ -1046,6 +1046,44 @@ var _ = Describe("backup and restore end to end tests", func() { assertRelationsCreatedInSchema(restoreConn, "schema2", 0) assertRelationsCreatedInSchema(restoreConn, "fooschema", 0) }) + It("runs gprestore with --redirect-schema restoring all privileges to the new schema", func() { + skipIfOldBackupVersionBefore("1.17.0") + testhelper.AssertQueryRuns(backupConn, + "DROP ROLE IF EXISTS test_user; CREATE ROLE test_user;") + defer testhelper.AssertQueryRuns(backupConn, + "DROP ROLE test_user") + testhelper.AssertQueryRuns(restoreConn, + "DROP SCHEMA IF EXISTS schema_to CASCADE; CREATE SCHEMA schema_to;") + defer testhelper.AssertQueryRuns(restoreConn, + "DROP SCHEMA schema_to CASCADE") + testhelper.AssertQueryRuns(backupConn, + "DROP SCHEMA IF EXISTS schema_from CASCADE; CREATE SCHEMA schema_from;") + defer testhelper.AssertQueryRuns(backupConn, + "DROP SCHEMA schema_from CASCADE") + testhelper.AssertQueryRuns(backupConn, + "CREATE TABLE schema_from.test_table(i int)") + testhelper.AssertQueryRuns(backupConn, + "GRANT SELECT ON schema_from.test_table TO test_user") + + timestamp := gpbackup(gpbackupPath, backupHelperPath, + "--include-schema", "schema_from") + + gprestore(gprestorePath, restoreHelperPath, timestamp, + "--redirect-db", "restoredb", + "--include-table", "schema_from.test_table", + "--redirect-schema", "schema_to") + + assertRelationsCreatedInSchema(restoreConn, "schema_to", 1) + + // The resulting grantee count is 2 (owner and 'test_user'). + // '--redirect-schema' should cause all privilege statements to be + // edited with new schema name. Restoring should not cause an error + // of not existed schema. + privCount := dbconn.MustSelectString(restoreConn, + `select count(distinct grantee) from information_schema.table_privileges` + + ` where table_schema = 'schema_to' and table_name = 'test_table'`) + Expect(privCount).To(Equal("2")) + }) }) Describe("ACLs for extensions", func() { It("runs gpbackup and gprestores any user defined ACLs on extensions", func() { From 1be2727a9ddf221e12b711031fcb09fa6b04ec57 Mon Sep 17 00:00:00 2001 From: Alexandr Barulev Date: Fri, 22 Sep 2023 12:10:31 +0300 Subject: [PATCH 25/25] ADBDEV-4198: Exclude subpartitions for given root partition in leaf-partition-data mode (#38) gpbackup with the --leaf-partition-data and --exclude options adds leaf partitions to the recovery plan, but the root partition is not included in the schema creation script and gprestore fails with an error. The solution is to exclude all leaf partitions for a given root partition in leaf-partition-data mode. The first test checks that if root partition exclusion and the leaf-partition-data option are specified, then leaf partitions should not be included in the restore. The second test checks that if leaf partition exclusion and the leaf-partition-data option are specified, then the patch does not change the behavior: only this leaf partition is excluded from the backup. The third test checks that if only a root partition exclusion is specified, then the patch does not change the behavior: the table does not include into the backup. Co-authored-by: Georgy Shelkovy --- backup/queries_relations.go | 37 +++++++++- end_to_end/filtered_test.go | 138 ++++++++++++++++++++++++++++++++++++ 2 files changed, 174 insertions(+), 1 deletion(-) diff --git a/backup/queries_relations.go b/backup/queries_relations.go index ce0db2efa..45050b35c 100644 --- a/backup/queries_relations.go +++ b/backup/queries_relations.go @@ -27,7 +27,7 @@ func relationAndSchemaFilterClause() string { quotedExcludeRelations, err := options.QuoteTableNames(connectionPool, MustGetFlagStringArray(options.EXCLUDE_RELATION)) gplog.FatalOnError(err) - excludeOids := getOidsFromRelationList(connectionPool, quotedExcludeRelations) + excludeOids := getExcludedRelationOidsList(connectionPool, quotedExcludeRelations) if len(excludeOids) > 0 { filterRelationClause += fmt.Sprintf("\nAND c.oid NOT IN (%s)", strings.Join(excludeOids, ", ")) } @@ -42,6 +42,41 @@ func relationAndSchemaFilterClause() string { return filterRelationClause } +func getExcludedRelationOidsList(connectionPool *dbconn.DBConn, quotedIncludeRelations []string) []string { + relList := utils.SliceToQuotedString(quotedIncludeRelations) + + query := "" + if connectionPool.Version.Before("6") { + query = fmt.Sprintf(` + WITH root_oids AS ( + SELECT c.oid AS string + FROM pg_class c + JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE quote_ident(n.nspname) || '.' || quote_ident(c.relname) IN (%s) + ) + SELECT string FROM root_oids + UNION + SELECT r.parchildrelid AS string + FROM pg_partition p JOIN pg_partition_rule r ON p.oid = r.paroid + JOIN root_oids oids ON p.parrelid = oids.string WHERE r.parchildrelid IS NOT NULL + `, relList) + } else { + query = fmt.Sprintf(` + WITH recursive cte AS ( + SELECT c.oid AS string + FROM pg_class c + JOIN pg_namespace n ON c.relnamespace = n.oid + WHERE quote_ident(n.nspname) || '.' || quote_ident(c.relname) IN (%s) + UNION ALL + SELECT inhrelid AS strings + FROM cte + LEFT JOIN pg_inherits ON inhparent = string + WHERE inhrelid IS NOT NULL + ) SELECT * FROM cte`, relList) + } + return dbconn.MustSelectStringSlice(connectionPool, query) +} + func getOidsFromRelationList(connectionPool *dbconn.DBConn, quotedIncludeRelations []string) []string { relList := utils.SliceToQuotedString(quotedIncludeRelations) query := fmt.Sprintf(` diff --git a/end_to_end/filtered_test.go b/end_to_end/filtered_test.go index 5d8bdaf4b..9192f6f3e 100644 --- a/end_to_end/filtered_test.go +++ b/end_to_end/filtered_test.go @@ -435,4 +435,142 @@ PARTITION BY LIST (gender) _ = os.Remove("/tmp/exclude-tables.txt") }) }) + Describe("Exclude subpartitions for given root partition in leaf-partition-data mode", func() { + It("runs gpbackup and gprestore with leaf-partition-data and exclude-table root partition backup flags", func() { + testhelper.AssertQueryRuns(backupConn, + `CREATE TABLE public.p1_sales (id int, a int, b int, region text) + WITH (appendoptimized=true) + DISTRIBUTED BY (id) + PARTITION BY RANGE (a) + SUBPARTITION BY RANGE (b) + SUBPARTITION TEMPLATE ( + START (1) END (3) EVERY (1)) + SUBPARTITION BY LIST (region) + SUBPARTITION TEMPLATE ( + SUBPARTITION usa VALUES ('usa'), + SUBPARTITION europe VALUES ('europe')) + ( START (1) END (3) EVERY (1))`) + defer testhelper.AssertQueryRuns(backupConn, `DROP TABLE public.p1_sales CASCADE`) + timestamp := gpbackup(gpbackupPath, backupHelperPath, "--leaf-partition-data", "--exclude-table", "public.p1_sales") + defer assertArtifactsCleaned(restoreConn, timestamp) + gprestore(gprestorePath, restoreHelperPath, timestamp, "--redirect-db", "restoredb") + assertTablesNotRestored(restoreConn, []string{ + "public.p1_sales", + "public.p1_sales_1_prt_1", + "public.p1_sales_1_prt_1_2_prt_1", + "public.p1_sales_1_prt_1_2_prt_1_3_prt_europe", + "public.p1_sales_1_prt_1_2_prt_1_3_prt_usa", + "public.p1_sales_1_prt_1_2_prt_2", + "public.p1_sales_1_prt_1_2_prt_2_3_prt_europe", + "public.p1_sales_1_prt_1_2_prt_2_3_prt_usa", + "public.p1_sales_1_prt_2", + "public.p1_sales_1_prt_2_2_prt_1", + "public.p1_sales_1_prt_2_2_prt_1_3_prt_europe", + "public.p1_sales_1_prt_2_2_prt_1_3_prt_usa", + "public.p1_sales_1_prt_2_2_prt_2", + "public.p1_sales_1_prt_2_2_prt_2_3_prt_europe", + "public.p1_sales_1_prt_2_2_prt_2_3_prt_usa", + }) + }) + It("runs gpbackup and gprestore with leaf-partition-data and exclude-table leaf partition backup flags", func() { + testhelper.AssertQueryRuns(backupConn, + `CREATE TABLE public.p2_sales (id int, a int, b int, region text) + WITH (appendoptimized=true) + DISTRIBUTED BY (id) + PARTITION BY RANGE (a) + SUBPARTITION BY RANGE (b) + SUBPARTITION TEMPLATE ( + START (1) END (3) EVERY (1)) + SUBPARTITION BY LIST (region) + SUBPARTITION TEMPLATE ( + SUBPARTITION usa VALUES ('usa'), + SUBPARTITION europe VALUES ('europe')) + ( START (1) END (3) EVERY (1))`) + testhelper.AssertQueryRuns(backupConn, + `INSERT INTO public.p2_sales VALUES + (1, 1, 1, 'usa'), + (1, 1, 1, 'europe'), + (1, 1, 2, 'usa'), + (1, 1, 2, 'europe'), + (1, 2, 1, 'usa'), + (1, 2, 1, 'europe'), + (1, 2, 2, 'usa'), + (1, 2, 2, 'europe') + `) + defer testhelper.AssertQueryRuns(backupConn, `DROP TABLE public.p2_sales CASCADE`) + timestamp := gpbackup(gpbackupPath, backupHelperPath, "--leaf-partition-data", "--exclude-table", "public.p2_sales_1_prt_1_2_prt_1_3_prt_usa") + defer assertArtifactsCleaned(restoreConn, timestamp) + gprestore(gprestorePath, restoreHelperPath, timestamp, "--redirect-db", "restoredb") + assertTablesRestored(restoreConn, []string{ + "public.p2_sales", + "public.p2_sales_1_prt_1", + "public.p2_sales_1_prt_1_2_prt_1", + "public.p2_sales_1_prt_1_2_prt_1_3_prt_europe", + "public.p2_sales_1_prt_1_2_prt_1_3_prt_usa", + "public.p2_sales_1_prt_1_2_prt_2", + "public.p2_sales_1_prt_1_2_prt_2_3_prt_europe", + "public.p2_sales_1_prt_1_2_prt_2_3_prt_usa", + "public.p2_sales_1_prt_2", + "public.p2_sales_1_prt_2_2_prt_1", + "public.p2_sales_1_prt_2_2_prt_1_3_prt_europe", + "public.p2_sales_1_prt_2_2_prt_1_3_prt_usa", + "public.p2_sales_1_prt_2_2_prt_2", + "public.p2_sales_1_prt_2_2_prt_2_3_prt_europe", + "public.p2_sales_1_prt_2_2_prt_2_3_prt_usa", + }) + assertDataRestored(restoreConn, map[string]int{ + "public.p2_sales": 7, + "public.p2_sales_1_prt_1": 3, + "public.p2_sales_1_prt_1_2_prt_1": 1, + "public.p2_sales_1_prt_1_2_prt_1_3_prt_europe": 1, + "public.p2_sales_1_prt_1_2_prt_1_3_prt_usa": 0, + "public.p2_sales_1_prt_1_2_prt_2": 2, + "public.p2_sales_1_prt_1_2_prt_2_3_prt_europe": 1, + "public.p2_sales_1_prt_1_2_prt_2_3_prt_usa": 1, + "public.p2_sales_1_prt_2": 4, + "public.p2_sales_1_prt_2_2_prt_1": 2, + "public.p2_sales_1_prt_2_2_prt_1_3_prt_europe": 1, + "public.p2_sales_1_prt_2_2_prt_1_3_prt_usa": 1, + "public.p2_sales_1_prt_2_2_prt_2": 2, + "public.p2_sales_1_prt_2_2_prt_2_3_prt_europe": 1, + "public.p2_sales_1_prt_2_2_prt_2_3_prt_usa": 1, + }) + }) + It("runs gpbackup and gprestore without leaf-partition-data and with exclude-table root partition backup flags", func() { + testhelper.AssertQueryRuns(backupConn, + `CREATE TABLE public.p3_sales (id int, a int, b int, region text) + WITH (appendoptimized=true) + DISTRIBUTED BY (id) + PARTITION BY RANGE (a) + SUBPARTITION BY RANGE (b) + SUBPARTITION TEMPLATE ( + START (1) END (3) EVERY (1)) + SUBPARTITION BY LIST (region) + SUBPARTITION TEMPLATE ( + SUBPARTITION usa VALUES ('usa'), + SUBPARTITION europe VALUES ('europe')) + ( START (1) END (3) EVERY (1))`) + defer testhelper.AssertQueryRuns(backupConn, `DROP TABLE public.p3_sales CASCADE`) + timestamp := gpbackup(gpbackupPath, backupHelperPath, "--exclude-table", "public.p3_sales") + defer assertArtifactsCleaned(restoreConn, timestamp) + gprestore(gprestorePath, restoreHelperPath, timestamp, "--redirect-db", "restoredb") + assertTablesNotRestored(restoreConn, []string{ + "public.p3_sales", + "public.p3_sales_1_prt_1", + "public.p3_sales_1_prt_1_2_prt_1", + "public.p3_sales_1_prt_1_2_prt_1_3_prt_europe", + "public.p3_sales_1_prt_1_2_prt_1_3_prt_usa", + "public.p3_sales_1_prt_1_2_prt_2", + "public.p3_sales_1_prt_1_2_prt_2_3_prt_europe", + "public.p3_sales_1_prt_1_2_prt_2_3_prt_usa", + "public.p3_sales_1_prt_2", + "public.p3_sales_1_prt_2_2_prt_1", + "public.p3_sales_1_prt_2_2_prt_1_3_prt_europe", + "public.p3_sales_1_prt_2_2_prt_1_3_prt_usa", + "public.p3_sales_1_prt_2_2_prt_2", + "public.p3_sales_1_prt_2_2_prt_2_3_prt_europe", + "public.p3_sales_1_prt_2_2_prt_2_3_prt_usa", + }) + }) + }) })