From 6e922158336fd8b95736e924a625c905a233df2b Mon Sep 17 00:00:00 2001 From: Brent Doil Date: Mon, 22 Nov 2021 14:02:45 -0500 Subject: [PATCH 01/16] Log AccessExclusiveLock held on relation when worker cannot obtain AccessShareLock In case we aren't able to grab an AccessShareLock on the table being backed up, we terminate the worker thread and its corresponding connection and let the main worker thread (Worker 0) handle them. This can happen when there is another session with an AccessExclusiveLock on the same table being handled by the worker thread. When this occurs, log the AccessExclusiveLock being held on the table. Authored-by: Brent Doil --- backup/backup.go | 75 +++++++++++++++++++++++++++++ backup/data.go | 11 ++--- end_to_end/end_to_end_suite_test.go | 5 ++ 3 files changed, 84 insertions(+), 7 deletions(-) diff --git a/backup/backup.go b/backup/backup.go index 19c3f98ca..f620a39f8 100644 --- a/backup/backup.go +++ b/backup/backup.go @@ -1,6 +1,7 @@ package backup import ( + "encoding/json" "errors" "fmt" "os" @@ -518,3 +519,77 @@ func CreateInitialSegmentPipes(oidList []string, c *cluster.Cluster, connectionP } return maxPipes } + +type TableLocks struct { + Oid uint32 + Database string + Relation string + Mode string + Application string + Granted string + User string + Pid string +} + +func getTableLocks(table Table) []TableLocks { + conn := dbconn.NewDBConnFromEnvironment(MustGetFlagString(options.DBNAME)) + conn.MustConnect(1) + var query string + defer conn.Close() + if conn.Version.Before("6") { + query = fmt.Sprintf(` + SELECT c.oid as oid, + coalesce(a.datname, '') as database, + n.nspname || '.' || l.relation::regclass as relation, + l.mode, + l.GRANTED as granted, + coalesce(a.application_name, '') as application, + coalesce(a.usename, '') as user, + a.procpid as pid + FROM pg_stat_activity a + JOIN pg_locks l ON l.pid = a.procpid + JOIN pg_class c on c.oid = l.relation + JOIN pg_namespace n on n.oid=c.relnamespace + WHERE (a.datname = '%s' OR a.datname IS NULL) + AND NOT a.procpid = pg_backend_pid() + AND relation = '%s'::regclass + AND mode = 'AccessExclusiveLock' + ORDER BY a.query_start; + `, conn.DBName, table.FQN()) + } else { + query = fmt.Sprintf(` + SELECT c.oid as oid, + coalesce(a.datname, '') as database, + n.nspname || '.' || l.relation::regclass relation, + l.mode, + l.GRANTED as granted, + coalesce(a.application_name, '') as application, + coalesce(a.usename, '') as user, + a.pid + FROM pg_stat_activity a + JOIN pg_locks l ON l.pid = a.pid + JOIN pg_class c on c.oid = l.relation + JOIN pg_namespace n on n.oid=c.relnamespace + WHERE (a.datname = '%s' OR a.datname IS NULL) + AND NOT a.pid = pg_backend_pid() + AND relation = '%s'::regclass + AND mode = 'AccessExclusiveLock' + ORDER BY a.query_start; + `, conn.DBName, table.FQN()) + } + + locksResults := make([]TableLocks, 0) + err := conn.Select(&locksResults, query) + if err != nil { + gplog.FatalOnError(err) + } + + return locksResults +} + +func logTableLocks(table Table, whichConn int) { + locks := getTableLocks(table) + jsonData, _ := json.Marshal(&locks) + gplog.Warn("Worker %d could not acquire AccessShareLock for table %s. Terminating worker and deferring table to main worker thread.", whichConn,table.FQN()) + gplog.Warn("Locks held on table %s: %s", table.FQN(), jsonData) +} diff --git a/backup/data.go b/backup/data.go index 48bef59c2..b8befc9df 100644 --- a/backup/data.go +++ b/backup/data.go @@ -183,9 +183,8 @@ func backupDataForAllTablesCopyQueue(tables []Table) []map[uint32]int64 { // the following WARN message is nicely outputted. fmt.Printf("\n") } - gplog.Warn("Worker %d could not acquire AccessShareLock for table %s. Terminating worker and deferring table to main worker thread.", - whichConn, table.FQN()) - + // Log locks held on the table + logTableLocks(table, whichConn) oidMap.Store(table.Oid, Deferred) // Rollback transaction since it's in an aborted state connectionPool.MustRollback(whichConn) @@ -322,9 +321,8 @@ func backupDataForAllTables(tables []Table) []map[uint32]int64 { // the following WARN message is nicely outputted. fmt.Printf("\n") } - gplog.Warn("Worker %d could not acquire AccessShareLock for table %s. Terminating worker and deferring table to main worker thread.", - whichConn, table.FQN()) - + // Log locks held on the table + logTableLocks(table, whichConn) // Defer table to main worker thread deferredTablesMutex.Lock() deferredTables = append(deferredTables, table) @@ -412,6 +410,5 @@ func LockTableNoWait(dataTable Table, connNum int) error { if err != nil { return err } - return nil } diff --git a/end_to_end/end_to_end_suite_test.go b/end_to_end/end_to_end_suite_test.go index 366b67cf7..30884d8c4 100644 --- a/end_to_end/end_to_end_suite_test.go +++ b/end_to_end/end_to_end_suite_test.go @@ -1656,6 +1656,11 @@ var _ = Describe("backup and restore end to end tests", func() { unexpectedCopyString := fmt.Sprintf("[DEBUG]:-Worker %d: COPY ", i) Expect(stdout).ToNot(ContainSubstring(unexpectedCopyString)) + + expectedLockString = fmt.Sprintf(`Locks held on table %s`, dataTables[i]) + Expect(stdout).To(ContainSubstring(expectedLockString)) + + Expect(stdout).To(ContainSubstring(`"Mode":"AccessExclusiveLock"`)) } // Only the main worker thread, worker 0, will run COPY on all the test tables From 2533ab46972fedb86c4c9036ed18c67cd67b79ee Mon Sep 17 00:00:00 2001 From: Brent Doil Date: Tue, 30 Nov 2021 18:46:34 -0500 Subject: [PATCH 02/16] Fix getTableLocks to not use text || regclass operator Resolves ERROR: operator does not exist: text || regclass on GPDB4.3 Authored-by: Brent Doil --- backup/backup.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/backup/backup.go b/backup/backup.go index f620a39f8..f280fcea5 100644 --- a/backup/backup.go +++ b/backup/backup.go @@ -540,7 +540,7 @@ func getTableLocks(table Table) []TableLocks { query = fmt.Sprintf(` SELECT c.oid as oid, coalesce(a.datname, '') as database, - n.nspname || '.' || l.relation::regclass as relation, + n.nspname || '.' || l.relation as relation, l.mode, l.GRANTED as granted, coalesce(a.application_name, '') as application, @@ -560,7 +560,7 @@ func getTableLocks(table Table) []TableLocks { query = fmt.Sprintf(` SELECT c.oid as oid, coalesce(a.datname, '') as database, - n.nspname || '.' || l.relation::regclass relation, + n.nspname || '.' || l.relation relation, l.mode, l.GRANTED as granted, coalesce(a.application_name, '') as application, From bd8862d300273d203a97454dad4c8fd0e38f2867 Mon Sep 17 00:00:00 2001 From: Brent Doil Date: Tue, 30 Nov 2021 19:19:16 -0500 Subject: [PATCH 03/16] Increase ccp_reap_minutes to 360 Authored-by: Brent Doil --- ci/gpbackup-generated.yml | 6 +++--- ci/gpbackup-release-generated.yml | 6 +++--- ci/templates/gpbackup-tpl.yml | 4 ++-- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/ci/gpbackup-generated.yml b/ci/gpbackup-generated.yml index bf0063c15..95dab75f9 100644 --- a/ci/gpbackup-generated.yml +++ b/ci/gpbackup-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpbackup-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -p gpbackup-release). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2021-07-27 15:59:55.830445 +## Generated by gen_pipeline.py at: 2021-11-30 19:18:54.855194 ## Template file: gpbackup-tpl.yml ## Pipeline Name: gpbackup ## Nightly Trigger: True @@ -875,7 +875,7 @@ jobs: number_of_nodes: 4 segments_per_host: 4 instance_type: n1-standard-8 - ccp_reap_minutes: 240 + ccp_reap_minutes: 360 - task: gen_cluster file: ccp_src/ci/tasks/gen_cluster.yml params: @@ -971,7 +971,7 @@ jobs: number_of_nodes: 4 segments_per_host: 4 aws_instance-node-instance_type: i3.xlarge - ccp_reap_minutes: 240 + ccp_reap_minutes: 360 - task: gen_cluster tags: ["ddboost"] params: diff --git a/ci/gpbackup-release-generated.yml b/ci/gpbackup-release-generated.yml index c1edcfd7a..ab5769b83 100644 --- a/ci/gpbackup-release-generated.yml +++ b/ci/gpbackup-release-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpbackup-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -p gpbackup-release). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2021-07-27 15:59:55.866024 +## Generated by gen_pipeline.py at: 2021-11-30 19:18:54.874625 ## Template file: gpbackup-tpl.yml ## Pipeline Name: gpbackup-release ## Nightly Trigger: True @@ -878,7 +878,7 @@ jobs: number_of_nodes: 4 segments_per_host: 4 instance_type: n1-standard-8 - ccp_reap_minutes: 240 + ccp_reap_minutes: 360 - task: gen_cluster file: ccp_src/ci/tasks/gen_cluster.yml params: @@ -974,7 +974,7 @@ jobs: number_of_nodes: 4 segments_per_host: 4 aws_instance-node-instance_type: i3.xlarge - ccp_reap_minutes: 240 + ccp_reap_minutes: 360 - task: gen_cluster tags: ["ddboost"] params: diff --git a/ci/templates/gpbackup-tpl.yml b/ci/templates/gpbackup-tpl.yml index 7ffc9c20e..eb434a7a1 100644 --- a/ci/templates/gpbackup-tpl.yml +++ b/ci/templates/gpbackup-tpl.yml @@ -1004,7 +1004,7 @@ jobs: number_of_nodes: 4 segments_per_host: 4 instance_type: n1-standard-8 - ccp_reap_minutes: 240 + ccp_reap_minutes: 360 - task: gen_cluster file: ccp_src/ci/tasks/gen_cluster.yml params: @@ -1102,7 +1102,7 @@ jobs: number_of_nodes: 4 segments_per_host: 4 aws_instance-node-instance_type: i3.xlarge - ccp_reap_minutes: 240 + ccp_reap_minutes: 360 - task: gen_cluster tags: ["ddboost"] params: From 3a9a4f58b42fc73ff1c35596cdb6d17e1294fa2e Mon Sep 17 00:00:00 2001 From: Brent Doil Date: Tue, 30 Nov 2021 19:20:05 -0500 Subject: [PATCH 04/16] Remove `--copy-queue-size 4 restore` scale test To reduce overall time of scale tests, only restore using --copy-queue-size 2 and --copy-queue-size 8 Authored-by: Brent Doil --- ci/scripts/scale-tests.bash | 3 --- 1 file changed, 3 deletions(-) diff --git a/ci/scripts/scale-tests.bash b/ci/scripts/scale-tests.bash index 09370a66a..cc91eacc0 100755 --- a/ci/scripts/scale-tests.bash +++ b/ci/scripts/scale-tests.bash @@ -59,9 +59,6 @@ gpbackup_manager display-report \$timestamp echo "## Performing single-data-file, --no-compression, --copy-queue-size 2 restore for copy queue test ##" time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --create-db --redirect-db copyqueuerestore2 --copy-queue-size 2 -echo "## Performing single-data-file, --no-compression, --copy-queue-size 4 restore for copy queue test ##" -time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --create-db --redirect-db copyqueuerestore4 --copy-queue-size 4 - echo "## Performing single-data-file, --no-compression, --copy-queue-size 8 restore for copy queue test ##" time gprestore --timestamp "\$timestamp" --backup-dir /data/gpdata/ --create-db --redirect-db copyqueuerestore8 --copy-queue-size 8 From 99504926b0d3cd426ec6b466ea91e0b578ab0ad8 Mon Sep 17 00:00:00 2001 From: Kevin Yeap Date: Tue, 30 Nov 2021 15:24:36 -0600 Subject: [PATCH 05/16] Local s3-plugin testing using Minio Minio is an s3 compliant object store. A Minio server can be setup locally and used to test our s3 plugin. This make local testing of our s3 plugin possible and easy without going through the trouble of needing to setup Amazon S3 just for a testing endpoint. Created a new makefile rule to setup a Minio server using docker, run our plugin_test against the s3 plugin, and tear it down. Co-authored-by: Kevin Yeap Co-authored-by: Brent Doil --- Makefile | 13 +++++++++++++ plugins/generate_minio_config.sh | 11 +++++++++++ 2 files changed, 24 insertions(+) create mode 100755 plugins/generate_minio_config.sh diff --git a/Makefile b/Makefile index 456cbee0c..eaea58288 100644 --- a/Makefile +++ b/Makefile @@ -108,6 +108,10 @@ clean : rm -f $(BIN_DIR)/$(BACKUP) $(BACKUP) $(BIN_DIR)/$(RESTORE) $(RESTORE) $(BIN_DIR)/$(HELPER) $(HELPER) # Test artifacts rm -rf /tmp/go-build* /tmp/gexec_artifacts* /tmp/ginkgo* + docker stop s3-minio # stop minio before removing its data directories + docker rm s3-minio + rm -rf /tmp/minio + rm -f /tmp/minio_config.yaml # Code coverage files rm -rf /tmp/cover* /tmp/unit* go clean -i -r -x -testcache -modcache @@ -126,3 +130,12 @@ info-report: @echo "Info and verbose messaging:" @echo "" @ag "gplog.Info|gplog.Verbose" --ignore "*_test*" + +test-s3-local: build install + ${PWD}/plugins/generate_minio_config.sh + mkdir -p /tmp/minio/gpbackup-s3-test + docker run -d --name s3-minio -p 9000:9000 -p 9001:9001 -v /tmp/minio:/data/minio quay.io/minio/minio server /data/minio --console-address ":9001" + sleep 2 # Wait for minio server to start up + ${PWD}/plugins/plugin_test.sh $(BIN_DIR)/gpbackup_s3_plugin /tmp/minio_config.yaml + docker stop s3-minio + docker rm s3-minio diff --git a/plugins/generate_minio_config.sh b/plugins/generate_minio_config.sh new file mode 100755 index 000000000..7f1a7dd66 --- /dev/null +++ b/plugins/generate_minio_config.sh @@ -0,0 +1,11 @@ +#!/bin/bash + +cat < /tmp/minio_config.yaml +executablepath: $(which gpbackup_s3_plugin) +options: + endpoint: http://localhost:9000/ + aws_access_key_id: minioadmin + aws_secret_access_key: minioadmin + bucket: gpbackup-s3-test + folder: test/backup +MINIO_CONFIG From 514f2921dd2a37b660596a5792052baeab6c3760 Mon Sep 17 00:00:00 2001 From: "yuanzefuwater@126.com" Date: Sat, 25 Dec 2021 21:33:57 +0800 Subject: [PATCH 06/16] I checked the Makefile and there is no 'build_mac' tag, so I make this change. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index f479f31b2..edacb235c 100644 --- a/README.md +++ b/README.md @@ -27,7 +27,7 @@ The `build` target will put the `gpbackup` and `gprestore` binaries in `$HOME/go This will also attempt to copy `gpbackup_helper` to the greenplum segments (retrieving hostnames from `gp_segment_configuration`). Pay attention to the output as it will indicate whether this operation was successful. -`make build_linux` and `make build_mac` are for cross compiling between macOS and Linux +`make build_linux` is for cross compiling on macOS, and the target is Linux. `make install` will scp the `gpbackup_helper` binary (used with -single-data-file flag) to all hosts From 33b9a7e819aa3c02c8634f589e60088b80ecf6cf Mon Sep 17 00:00:00 2001 From: hughcapet Date: Tue, 19 Oct 2021 10:27:25 +0300 Subject: [PATCH 07/16] Fix single-data-file incremental backup panic if all tables are filtered When taking a single-data-file incremental backup of a database of AO and external tables and no heap tables, gpbackup will panic if there are no tables to backup. This is because AO tables with no incremental changes are filtered out first while external tables are filtered out just before creating the first table's pipe for single-data-file. If all tables get filtered out, when gpbackup tries access the first table's oid to create the first pipe, it will cause an 'out of range' panic because the table list is empty. With this commit, gpbackup avoids this situation by filtering out all external tables early before backupData is called. This will cause backupData to return early because it knows there is no data to backup. --- backup/backup.go | 12 ++++++++---- backup/data.go | 27 +++++++++++---------------- backup/data_test.go | 42 ++++++++++++++++++++++++++++-------------- 3 files changed, 47 insertions(+), 34 deletions(-) diff --git a/backup/backup.go b/backup/backup.go index f280fcea5..7557faca7 100644 --- a/backup/backup.go +++ b/backup/backup.go @@ -120,13 +120,18 @@ func DoBackup() { gplog.Info("Gathering table state information") metadataTables, dataTables := RetrieveAndProcessTables() + dataTables, numExtOrForeignTables := GetBackupDataSet(dataTables) + if len(dataTables) == 0 { + gplog.Warn("No tables in backup set contain data. Performing metadata-only backup instead.") + backupReport.MetadataOnly = true + } // This must be a full backup with --leaf-parition-data to query for incremental metadata if !(MustGetFlagBool(options.METADATA_ONLY) || MustGetFlagBool(options.DATA_ONLY)) && MustGetFlagBool(options.LEAF_PARTITION_DATA) { backupIncrementalMetadata() } else { gplog.Verbose("Skipping query for incremental metadata.") } - CheckTablesContainData(dataTables) + metadataFilename := globalFPInfo.GetMetadataFilePath() gplog.Info("Metadata will be written to %s", metadataFilename) metadataFile := utils.NewFileWithByteCountFromFile(metadataFilename) @@ -163,6 +168,7 @@ func DoBackup() { backupReport.RestorePlan = PopulateRestorePlan(backupSetTables, targetBackupRestorePlan, dataTables) backupData(backupSetTables) } + printDataBackupWarnings(numExtOrForeignTables) if MustGetFlagBool(options.WITH_STATS) { backupStatistics(metadataTables) } @@ -265,9 +271,7 @@ func backupData(tables []Table) { utils.VerifyHelperVersionOnSegments(version, globalCluster) oidList := make([]string, 0, len(tables)) for _, table := range tables { - if !table.SkipDataBackup() { - oidList = append(oidList, fmt.Sprintf("%d", table.Oid)) - } + oidList = append(oidList, fmt.Sprintf("%d", table.Oid)) } utils.WriteOidListToSegments(oidList, globalCluster, globalFPInfo) compressStr := fmt.Sprintf(" --compression-level %d --compression-type %s", MustGetFlagInt(options.COMPRESSION_LEVEL), MustGetFlagString(options.COMPRESSION_TYPE)) diff --git a/backup/data.go b/backup/data.go index b8befc9df..753aba6eb 100644 --- a/backup/data.go +++ b/backup/data.go @@ -265,13 +265,7 @@ func backupDataForAllTablesCopyQueue(tables []Table) []map[uint32]int64 { } func backupDataForAllTables(tables []Table) []map[uint32]int64 { - var numExtOrForeignTables int64 - for _, table := range tables { - if table.SkipDataBackup() { - numExtOrForeignTables++ - } - } - counters := BackupProgressCounters{NumRegTables: 0, TotalRegTables: int64(len(tables)) - numExtOrForeignTables} + counters := BackupProgressCounters{NumRegTables: 0, TotalRegTables: int64(len(tables))} counters.ProgressBar = utils.NewProgressBar(int(counters.TotalRegTables), "Tables backed up: ", utils.PB_INFO) counters.ProgressBar.Start() rowsCopiedMaps := make([]map[uint32]int64, connectionPool.NumConns) @@ -296,10 +290,6 @@ func backupDataForAllTables(tables []Table) []map[uint32]int64 { return } - if table.SkipDataBackup() { - gplog.Verbose("Skipping data backup of table %s because it is either an external or foreign table.", table.FQN()) - continue - } // If a random external SQL command had queued an AccessExclusiveLock acquisition request // against this next table, the --job worker thread would deadlock on the COPY attempt. // To prevent gpbackup from hanging, we attempt to acquire an AccessShareLock on the @@ -378,7 +368,6 @@ func backupDataForAllTables(tables []Table) []map[uint32]int64 { } counters.ProgressBar.Finish() - printDataBackupWarnings(numExtOrForeignTables) return rowsCopiedMaps } @@ -389,16 +378,22 @@ func printDataBackupWarnings(numExtTables int64) { } } -func CheckTablesContainData(tables []Table) { +// Remove external/foreign tables from the data backup set +func GetBackupDataSet(tables []Table) ([]Table, int64) { + var backupDataSet []Table + var numExtOrForeignTables int64 + if !backupReport.MetadataOnly { for _, table := range tables { if !table.SkipDataBackup() { - return + backupDataSet = append(backupDataSet, table) + } else { + gplog.Verbose("Skipping data backup of table %s because it is either an external or foreign table.", table.FQN()) + numExtOrForeignTables++ } } - gplog.Warn("No tables in backup set contain data. Performing metadata-only backup instead.") - backupReport.MetadataOnly = true } + return backupDataSet, numExtOrForeignTables } // Acquire AccessShareLock on a table with NOWAIT option. If we are unable to acquire diff --git a/backup/data_test.go b/backup/data_test.go index d4d7db22e..ffb0b31fa 100644 --- a/backup/data_test.go +++ b/backup/data_test.go @@ -197,7 +197,7 @@ var _ = Describe("backup/data tests", func() { Expect(counters.NumRegTables).To(Equal(int64(1))) }) }) - Describe("CheckDBContainsData", func() { + Describe("GetBackupDataSet", func() { config := history.BackupConfig{} var testTable backup.Table BeforeEach(func() { @@ -208,24 +208,38 @@ var _ = Describe("backup/data tests", func() { TableDefinition: backup.TableDefinition{}, } }) - It("changes backup type to metadata if no tables in DB", func() { - backup.CheckTablesContainData([]backup.Table{}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeTrue()) + It("excludes a foreign table", func() { + foreignDef := backup.ForeignTableDefinition{Oid: 23, Options: "", Server: "fs"} + testTable.ForeignDef = foreignDef + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(0)) + Expect(numExtOrForeignTables).To(Equal(int64(1))) }) - It("changes backup type to metadata if only external or foreign tables in database", func() { + It("excludes an external table", func() { testTable.IsExternal = true - backup.CheckTablesContainData([]backup.Table{testTable}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeTrue()) + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(0)) + Expect(numExtOrForeignTables).To(Equal(int64(1))) }) - It("does not change backup type if metadata-only backup", func() { + It("doesn't exclude regular table", func() { + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(1)) + Expect(numExtOrForeignTables).To(Equal(int64(0))) + }) + It("returns an empty set, if --metadata-only flag is set and a regular table is given", func() { config.MetadataOnly = true backup.SetReport(&report.Report{BackupConfig: config}) - backup.CheckTablesContainData([]backup.Table{}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeTrue()) - }) - It("does not change backup type if tables present in database", func() { - backup.CheckTablesContainData([]backup.Table{testTable}) - Expect(backup.GetReport().BackupConfig.MetadataOnly).To(BeFalse()) + tables := []backup.Table{testTable} + + dataTables, numExtOrForeignTables := backup.GetBackupDataSet(tables) + Expect(len(dataTables)).To(Equal(0)) + Expect(numExtOrForeignTables).To(Equal(int64(0))) }) }) }) From ad7c36449047046e31163cfacd188b5d9be18109 Mon Sep 17 00:00:00 2001 From: hughcapet Date: Thu, 9 Sep 2021 16:46:33 +0300 Subject: [PATCH 08/16] Prohibit inclusion of leaf partitions without --leaf-partition-data With this commit gpbackup avoids situation, when it is possible to specify --include-table(-file) flag with a leaf partition table and (optionally) with the parent table but without --leaf-partition-data flag. It is necessary to force user to use --leaf-partition-data flag, otherwise gpbackup executes COPY operation for both: parent and leaf partition tables, thus data in partition table is backed up (and restored) twice. --- backup/validate.go | 3 +++ backup/validate_test.go | 14 ++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/backup/validate.go b/backup/validate.go index 0057f39ca..610a6b2a0 100644 --- a/backup/validate.go +++ b/backup/validate.go @@ -88,6 +88,9 @@ func ValidateTablesExist(conn *dbconn.DBConn, tableList []string, excludeSet boo if partTableMap[tableOid].Level == "i" { gplog.Fatal(nil, "Cannot filter on %s, as it is an intermediate partition table. Only parent partition tables and leaf partition tables may be specified.", table) } + if partTableMap[tableOid].Level == "l" && !MustGetFlagBool(options.LEAF_PARTITION_DATA) { + gplog.Fatal(nil, "--leaf-partition-data flag must be specified to filter on %s, as it is a leaf partition table.", table) + } } } diff --git a/backup/validate_test.go b/backup/validate_test.go index 9d46138e8..ec91ee372 100644 --- a/backup/validate_test.go +++ b/backup/validate_test.go @@ -150,6 +150,7 @@ var _ = Describe("backup/validate tests", func() { partitionTables.AddRow("1", "l", "root") mock.ExpectQuery("SELECT (.*)").WillReturnRows(partitionTables) filterList = []string{"public.table1"} + _ = cmdFlags.Set(options.LEAF_PARTITION_DATA, "true") backup.ValidateTablesExist(connectionPool, filterList, false) }) It("panics if given an intermediate partition table and --leaf-partition-data is set", func() { @@ -179,6 +180,19 @@ var _ = Describe("backup/validate tests", func() { defer testhelper.ShouldPanicWithMessage("Cannot filter on public.table1, as it is an intermediate partition table. Only parent partition tables and leaf partition tables may be specified.") backup.ValidateTablesExist(connectionPool, filterList, false) }) + It("panics if given a leaf partition table and --leaf-partition-data is not set", func() { + // Added to handle call to `quote_ident` + schemaAndTable.AddRow("public", "table1") + mock.ExpectQuery("SELECT (.*)").WillReturnRows(schemaAndTable) + // + tableRows.AddRow("1", "public.table1") + mock.ExpectQuery("SELECT (.*)").WillReturnRows(tableRows) + partitionTables.AddRow("1", "l", "root") + mock.ExpectQuery("SELECT (.*)").WillReturnRows(partitionTables) + filterList = []string{"public.table1"} + defer testhelper.ShouldPanicWithMessage("--leaf-partition-data flag must be specified to filter on public.table1, as it is a leaf partition table.") + backup.ValidateTablesExist(connectionPool, filterList, false) + }) }) }) Describe("Validate various flag combinations that are required or exclusive", func() { From eb19837acb307e59ab1c418e30ac0930cce8cbb5 Mon Sep 17 00:00:00 2001 From: Brent Doil Date: Fri, 21 Jan 2022 14:07:54 -0500 Subject: [PATCH 09/16] Bump golang to 1.17 and upgrade dependencies Remove GO111MODULE=on This has been the default behavior since go1.16 https://maelvls.dev/go111module-everywhere/#go111module-with-go-117 Use golang image to build go binaries Source /opt/gcc_env.sh if it exists. This is required because the default centos6 gcc version is too old to compile ginkgo. The centos6 containers include gcc6.2 in /opt Update default VM test platform to centos7. Authored-by: Brent Doil --- Makefile | 25 +- ci/gpbackup-generated.yml | 26 +- ci/gpbackup-release-generated.yml | 24 +- ci/scripts/all-tests.bash | 4 +- ci/scripts/gpbackup-tests.bash | 6 +- .../integration-tests-fixed-version.bash | 4 +- ci/scripts/setup-centos-env.bash | 5 +- ci/scripts/test-on-local-cluster.bash | 11 +- ci/tasks/build-go-binaries.yml | 5 +- ci/templates/gpbackup-tpl.yml | 4 +- go.mod | 55 +- go.sum | 872 +++++++++++++++--- 12 files changed, 853 insertions(+), 188 deletions(-) diff --git a/Makefile b/Makefile index eaea58288..2f41bc80c 100644 --- a/Makefile +++ b/Makefile @@ -22,21 +22,20 @@ SUBDIRS_ALL=$(SUBDIRS_HAS_UNIT) integration/ end_to_end/ GOLANG_LINTER=$(GOPATH)/bin/golangci-lint GINKGO=$(GOPATH)/bin/ginkgo GOIMPORTS=$(GOPATH)/bin/goimports -GO_ENV=GO111MODULE=on # ensures the project still compiles in $GOPATH/src using golang versions 1.12 and below -GO_BUILD=$(GO_ENV) go build -mod=readonly +GO_BUILD=go build -mod=readonly DEBUG=-gcflags=all="-N -l" CUSTOM_BACKUP_DIR ?= "/tmp" helper_path ?= $(BIN_DIR)/$(HELPER) depend : - $(GO_ENV) go mod download + go mod download -$(GINKGO) : - $(GO_ENV) go install github.com/onsi/ginkgo/ginkgo +$(GINKGO) : # v1.14.0 is compatible with centos6 default gcc version + go install github.com/onsi/ginkgo/ginkgo@v1.14.0 $(GOIMPORTS) : - $(GO_ENV) go install golang.org/x/tools/cmd/goimports + go install golang.org/x/tools/cmd/goimports@latest format : $(GOIMPORTS) @goimports -w $(shell find . -type f -name '*.go' -not -path "./vendor/*") @@ -52,21 +51,21 @@ lint : $(GOLANG_LINTER) golangci-lint run --tests=false unit : $(GINKGO) - $(GO_ENV) ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 + ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 unit_all_gpdb_versions : $(GINKGO) - TEST_GPDB_VERSION=4.3.999 $(GO_ENV) ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 - TEST_GPDB_VERSION=5.999.0 $(GO_ENV) ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 - TEST_GPDB_VERSION=6.999.0 $(GO_ENV) ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 - TEST_GPDB_VERSION=7.0.0 $(GO_ENV) ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 # GPDB master + TEST_GPDB_VERSION=4.3.999 ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 + TEST_GPDB_VERSION=5.999.0 ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 + TEST_GPDB_VERSION=6.999.0 ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 + TEST_GPDB_VERSION=7.0.0 ginkgo $(GINKGO_FLAGS) $(SUBDIRS_HAS_UNIT) 2>&1 # GPDB master integration : $(GINKGO) - $(GO_ENV) ginkgo $(GINKGO_FLAGS) integration 2>&1 + ginkgo $(GINKGO_FLAGS) integration 2>&1 test : build unit integration end_to_end : $(GINKGO) - $(GO_ENV) ginkgo $(GINKGO_FLAGS) -slowSpecThreshold=10 end_to_end -- --custom_backup_dir $(CUSTOM_BACKUP_DIR) 2>&1 + ginkgo $(GINKGO_FLAGS) -slowSpecThreshold=10 end_to_end -- --custom_backup_dir $(CUSTOM_BACKUP_DIR) 2>&1 coverage : @./show_coverage.sh diff --git a/ci/gpbackup-generated.yml b/ci/gpbackup-generated.yml index 95dab75f9..0d1292b0b 100644 --- a/ci/gpbackup-generated.yml +++ b/ci/gpbackup-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpbackup-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -p gpbackup-release). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2021-11-30 19:18:54.855194 +## Generated by gen_pipeline.py at: 2022-01-25 15:51:45.888347 ## Template file: gpbackup-tpl.yml ## Pipeline Name: gpbackup ## Nightly Trigger: True @@ -98,11 +98,11 @@ ccp_aws_default_params_anchor: &ccp_aws_default_params ccp_vars_anchor: &ccp_default_vars instance_type: n1-standard-1 - PLATFORM: centos6 + PLATFORM: centos7 ccp_vars_anchor_aws: &ccp_default_vars_aws aws_instance-node-instance_type: t2.medium - PLATFORM: centos6 + PLATFORM: centos7 ccp_gen_cluster_default_params_anchor: &ccp_gen_cluster_default_params AWS_ACCESS_KEY_ID: {{tf-machine-access-key-id}} @@ -111,6 +111,7 @@ ccp_gen_cluster_default_params_anchor: &ccp_gen_cluster_default_params BUCKET_PATH: clusters-google/ BUCKET_NAME: {{tf-bucket-name}} CLOUD_PROVIDER: google + PLATFORM: centos7 ddboost_params_anchor: &ddboost_params DD_SOURCE_HOST: {{datadomain_source_host}} @@ -251,7 +252,7 @@ ddboost_plugin_and_boostfs_tests_anchor: &ddboost_plugin_and_boostfs_tests <<: *ccp_aws_default_params vars: aws_instance-node-instance_type: t2.medium - PLATFORM: centos6 + PLATFORM: centos7 - task: gen_cluster tags: ["ddboost"] params: @@ -701,7 +702,6 @@ jobs: trigger: true - get: pivnet_release_cache - task: build-go-binaries - image: centos6-image file: gpbackup/ci/tasks/build-go-binaries.yml - put: gpbackup-go-components params: @@ -820,12 +820,10 @@ jobs: <<: *ccp_default_params vars: <<: *ccp_default_vars - PLATFORM: centos7 - task: gen_cluster file: ccp_src/ci/tasks/gen_cluster.yml params: <<: *ccp_gen_cluster_default_params - PLATFORM: centos7 - task: gpinitsystem file: ccp_src/ci/tasks/gpinitsystem.yml - task: setup-centos-env @@ -1279,7 +1277,7 @@ jobs: - get: gpbackup passed: [build_gppkgs] - get: gpdb_binary - resource: bin_gpdb_5x_stable_centos6 + resource: bin_gpdb_5x_stable_centos7 trigger: true - get: bin_gpbackup_1.0.0_and_1.7.1 - get: ccp_src @@ -1303,8 +1301,8 @@ jobs: - task: gpinitsystem file: ccp_src/ci/tasks/gpinitsystem.yml - task: setup-centos-env - image: centos6-image file: gpbackup/ci/tasks/setup-centos-env.yml + image: centos6-image - task: backup-1.0.0-restore-latest image: centos6-image file: gpbackup/ci/tasks/all-tests.yml @@ -1315,10 +1313,8 @@ jobs: file: gpbackup/ci/tasks/all-tests.yml params: GPBACKUP_VERSION: "1.7.1" - on_success: - <<: *ccp_destroy - on_failure: - *slack_alert + on_success: + <<: *ccp_destroy ensure: <<: *set_failed @@ -1394,11 +1390,9 @@ jobs: terraform_source: ccp_src/google-nvme-block-device/ vars: instance_type: n1-standard-8 - PLATFORM: centos7 - task: gen_cluster params: <<: *ccp_gen_cluster_default_params - PLATFORM: centos7 file: ccp_src/ci/tasks/gen_cluster.yml - task: gpinitsystem file: ccp_src/ci/tasks/gpinitsystem.yml @@ -1440,11 +1434,9 @@ jobs: terraform_source: ccp_src/google-nvme-block-device/ vars: instance_type: n1-standard-8 - PLATFORM: centos7 - task: gen_cluster params: <<: *ccp_gen_cluster_default_params - PLATFORM: centos7 file: ccp_src/ci/tasks/gen_cluster.yml - task: gpinitsystem file: ccp_src/ci/tasks/gpinitsystem.yml diff --git a/ci/gpbackup-release-generated.yml b/ci/gpbackup-release-generated.yml index ab5769b83..f842df211 100644 --- a/ci/gpbackup-release-generated.yml +++ b/ci/gpbackup-release-generated.yml @@ -12,7 +12,7 @@ ## file (example: templates/gpbackup-tpl.yml) and regenerate the pipeline ## using appropriate tool (example: gen_pipeline.py -p gpbackup-release). ## ---------------------------------------------------------------------- -## Generated by gen_pipeline.py at: 2021-11-30 19:18:54.874625 +## Generated by gen_pipeline.py at: 2022-01-25 15:51:45.905917 ## Template file: gpbackup-tpl.yml ## Pipeline Name: gpbackup-release ## Nightly Trigger: True @@ -98,11 +98,11 @@ ccp_aws_default_params_anchor: &ccp_aws_default_params ccp_vars_anchor: &ccp_default_vars instance_type: n1-standard-1 - PLATFORM: centos6 + PLATFORM: centos7 ccp_vars_anchor_aws: &ccp_default_vars_aws aws_instance-node-instance_type: t2.medium - PLATFORM: centos6 + PLATFORM: centos7 ccp_gen_cluster_default_params_anchor: &ccp_gen_cluster_default_params AWS_ACCESS_KEY_ID: {{tf-machine-access-key-id}} @@ -111,6 +111,7 @@ ccp_gen_cluster_default_params_anchor: &ccp_gen_cluster_default_params BUCKET_PATH: clusters-google/ BUCKET_NAME: {{tf-bucket-name}} CLOUD_PROVIDER: google + PLATFORM: centos7 ddboost_params_anchor: &ddboost_params DD_SOURCE_HOST: {{datadomain_source_host}} @@ -251,7 +252,7 @@ ddboost_plugin_and_boostfs_tests_anchor: &ddboost_plugin_and_boostfs_tests <<: *ccp_aws_default_params vars: aws_instance-node-instance_type: t2.medium - PLATFORM: centos6 + PLATFORM: centos7 - task: gen_cluster tags: ["ddboost"] params: @@ -702,7 +703,6 @@ jobs: - get: gpbackup - get: pivnet_release_cache - task: build-go-binaries - image: centos6-image file: gpbackup/ci/tasks/build-go-binaries.yml - put: gpbackup-go-components params: @@ -823,12 +823,10 @@ jobs: <<: *ccp_default_params vars: <<: *ccp_default_vars - PLATFORM: centos7 - task: gen_cluster file: ccp_src/ci/tasks/gen_cluster.yml params: <<: *ccp_gen_cluster_default_params - PLATFORM: centos7 - task: gpinitsystem file: ccp_src/ci/tasks/gpinitsystem.yml - task: setup-centos-env @@ -1252,7 +1250,7 @@ jobs: - get: gpbackup passed: [build_gppkgs] - get: gpdb_binary - resource: bin_gpdb_5x_stable_centos6 + resource: bin_gpdb_5x_stable_centos7 - get: bin_gpbackup_1.0.0_and_1.7.1 - get: ccp_src - get: gpdb_src @@ -1275,8 +1273,8 @@ jobs: - task: gpinitsystem file: ccp_src/ci/tasks/gpinitsystem.yml - task: setup-centos-env - image: centos6-image file: gpbackup/ci/tasks/setup-centos-env.yml + image: centos6-image - task: backup-1.0.0-restore-latest image: centos6-image file: gpbackup/ci/tasks/all-tests.yml @@ -1287,10 +1285,8 @@ jobs: file: gpbackup/ci/tasks/all-tests.yml params: GPBACKUP_VERSION: "1.7.1" - on_success: - <<: *ccp_destroy - on_failure: - *slack_alert + on_success: + <<: *ccp_destroy ensure: <<: *set_failed @@ -1319,11 +1315,9 @@ jobs: terraform_source: ccp_src/google-nvme-block-device/ vars: instance_type: n1-standard-8 - PLATFORM: centos7 - task: gen_cluster params: <<: *ccp_gen_cluster_default_params - PLATFORM: centos7 file: ccp_src/ci/tasks/gen_cluster.yml - task: gpinitsystem file: ccp_src/ci/tasks/gpinitsystem.yml diff --git a/ci/scripts/all-tests.bash b/ci/scripts/all-tests.bash index d907ae57c..6baf87e15 100755 --- a/ci/scripts/all-tests.bash +++ b/ci/scripts/all-tests.bash @@ -24,7 +24,9 @@ cat <