Skip to content

Commit

Permalink
Do not start the next batch after an error during restore (#98)
Browse files Browse the repository at this point in the history
During resize restore, gprestore does not create a skip file after each failed
COPY command and continues to try to start restoring the remaining batches of
the current table. This does not make sense, and so this patch disables such
attempts by returning an error immediately.
  • Loading branch information
RekGRpth authored Jul 30, 2024
1 parent cfcba1d commit 51e319b
Show file tree
Hide file tree
Showing 3 changed files with 30 additions and 9 deletions.
29 changes: 29 additions & 0 deletions end_to_end/end_to_end_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2522,6 +2522,35 @@ LANGUAGE plpgsql NO SQL;`)
assertArtifactsCleaned("20240710143553")
testhelper.AssertQueryRuns(restoreConn, "DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP TABLE d; DROP TABLE e; DROP TABLE f; DROP TABLE g; DROP TABLE h;")
})
It("Will not start next batch after error during resize single data file restore", func() {
command := exec.Command("tar", "-xzf", "resources/4-segment-db-single-backup-dir.tar.gz", "-C", backupDir)
mustRunCommand(command)
gprestoreCmd := exec.Command(gprestorePath,
"--timestamp", "20240730085053",
"--redirect-db", "restoredb",
"--backup-dir", path.Join(backupDir, "4-segment-db-single-backup-dir"),
"--resize-cluster", "--on-error-continue", "--metadata-only")
_, err := gprestoreCmd.CombinedOutput()
Expect(err).To(Not(HaveOccurred()))
testhelper.AssertQueryRuns(restoreConn, "ALTER TABLE a ADD COLUMN c text NOT NULL; ALTER TABLE b ADD COLUMN c text NOT NULL; ALTER TABLE c ADD COLUMN c text NOT NULL;")
gprestoreCmd = exec.Command(gprestorePath,
"--timestamp", "20240730085053",
"--redirect-db", "restoredb",
"--backup-dir", path.Join(backupDir, "4-segment-db-single-backup-dir"),
"--resize-cluster", "--on-error-continue", "--data-only", "--verbose")
output, err := gprestoreCmd.CombinedOutput()
Expect(err).To(HaveOccurred())
Expect(string(output)).To(ContainSubstring(`null value in column "c" violates not-null constraint`))
Expect(string(output)).To(MatchRegexp(`Error loading data into table public.a: COPY a, line 1: "\d,a": ERROR: null value in column "c" violates not-null constraint`))
Expect(string(output)).To(MatchRegexp(`Error loading data into table public.b: COPY b, line 1: "\d,b": ERROR: null value in column "c" violates not-null constraint`))
Expect(string(output)).To(MatchRegexp(`Error loading data into table public.c: COPY c, line 1: "\d,c": ERROR: null value in column "c" violates not-null constraint`))
Expect(string(output)).To(Not(ContainSubstring(fmt.Sprintf(`"<SEG_DATA_DIR>/gpbackup_<SEGID>_20240730085053_pipe_%d_16411_1"`, gprestoreCmd.Process.Pid))))
Expect(string(output)).To(Not(ContainSubstring(fmt.Sprintf(`"<SEG_DATA_DIR>/gpbackup_<SEGID>_20240730085053_pipe_%d_16417_1"`, gprestoreCmd.Process.Pid))))
Expect(string(output)).To(Not(ContainSubstring(fmt.Sprintf(`"<SEG_DATA_DIR>/gpbackup_<SEGID>_20240730085053_pipe_%d_16423_1"`, gprestoreCmd.Process.Pid))))
Expect(string(output)).To(ContainSubstring(`Encountered 3 error(s) during table data restore`))
assertArtifactsCleaned("20240730085053")
testhelper.AssertQueryRuns(restoreConn, "DROP TABLE a; DROP TABLE b; DROP TABLE c; DROP TABLE d;")
})
})
Describe("Restore indexes and constraints on exchanged partition tables", func() {
BeforeEach(func() {
Expand Down
Binary file not shown.
10 changes: 1 addition & 9 deletions restore/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ func CopyTableIn(queryContext context.Context, connectionPool *dbconn.DBConn, ta
func restoreSingleTableData(queryContext context.Context, fpInfo *filepath.FilePathInfo, entry toc.CoordinatorDataEntry, tableName string, whichConn int) error {
origSize, destSize, resizeCluster, batches := GetResizeClusterInfo()

var lastErr error
var numRowsRestored int64
// We don't want duplicate data for replicated tables so only do one batch
if entry.IsReplicated {
Expand Down Expand Up @@ -115,21 +114,14 @@ func restoreSingleTableData(queryContext context.Context, fpInfo *filepath.FileP
if copyErr != nil {
gplog.Error(copyErr.Error())
if MustGetFlagBool(options.ON_ERROR_CONTINUE) {
if connectionPool.Version.AtLeast("6") && backupConfig.SingleDataFile {
if connectionPool.Version.AtLeast("6") && (backupConfig.SingleDataFile || resizeCluster) {
// inform segment helpers to skip this entry
utils.CreateSkipFileOnSegments(fmt.Sprintf("%d", entry.Oid), tableName, globalCluster, globalFPInfo)
}
lastErr = copyErr
copyErr = nil
continue
}
return copyErr
}
numRowsRestored += partialRowsRestored

}
if lastErr != nil {
return lastErr
}

numRowsBackedUp := entry.RowsCopied
Expand Down

0 comments on commit 51e319b

Please sign in to comment.