Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ADBDEV-4338: Fix gprestore with --resize-cluster to a smaller one #40

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
45 changes: 0 additions & 45 deletions end_to_end/end_to_end_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -420,43 +420,6 @@ func extractSavedTarFile(backupDir string, tarBaseName string) string {
return extractDirectory
}

// Move extracted data files to the proper directory for a larger-to-smaller restore, if necessary
// Assumes all saved backups have a name in the format "N-segment-db-..." where N is the original cluster size
func moveSegmentBackupFiles(tarBaseName string, extractDirectory string, isMultiNode bool, timestamps ...string) {
re := regexp.MustCompile("^([0-9]+)-.*")
origSize, _ := strconv.Atoi(re.FindStringSubmatch(tarBaseName)[1])
for _, ts := range timestamps {
if ts != "" {
baseDir := fmt.Sprintf("%s/demoDataDir%s/backups/%s/%s", extractDirectory, "%d", ts[0:8], ts)
if isMultiNode {
remoteOutput := backupCluster.GenerateAndExecuteCommand("Create backup directories on segments", cluster.ON_SEGMENTS, func(contentID int) string {
return fmt.Sprintf("mkdir -p %s", fmt.Sprintf(baseDir, contentID))
})
backupCluster.CheckClusterError(remoteOutput, "Unable to create directories", func(contentID int) string {
return ""
})
for i := 0; i < origSize; i++ {
origDir := fmt.Sprintf(baseDir, i)
destDir := fmt.Sprintf(baseDir, i%segmentCount)
_, err := backupCluster.ExecuteLocalCommand(fmt.Sprintf(`rsync -r -e ssh %s/ %s:%s`, origDir, backupCluster.GetHostForContent(i%segmentCount), destDir))
if err != nil {
Fail(fmt.Sprintf("Could not copy %s to %s: %v", origDir, destDir, err))
}
}
} else {
for i := segmentCount; i < origSize; i++ {
origDir := fmt.Sprintf(baseDir, i)
destDir := fmt.Sprintf(baseDir, i%segmentCount)
files, _ := path.Glob(fmt.Sprintf("%s/*", origDir))
for _, dataFile := range files {
os.Rename(dataFile, fmt.Sprintf("%s/%s", destDir, path.Base(dataFile)))
}
}
}
}
}
}

func TestEndToEnd(t *testing.T) {
format.MaxLength = 0
RegisterFailHandler(Fail)
Expand Down Expand Up @@ -1997,11 +1960,6 @@ LANGUAGE plpgsql NO SQL;`)
defer testhelper.AssertQueryRuns(restoreConn, `DROP SCHEMA IF EXISTS schematwo CASCADE;`)
defer testhelper.AssertQueryRuns(restoreConn, `DROP SCHEMA IF EXISTS schemathree CASCADE;`)

if !testUsesPlugin { // No need to manually move files when using a plugin
isMultiNode := (backupCluster.GetHostForContent(0) != backupCluster.GetHostForContent(-1))
moveSegmentBackupFiles(tarBaseName, extractDirectory, isMultiNode, fullTimestamp, incrementalTimestamp)
}

// This block stops the test if it hangs. It was introduced to prevent hangs causing timeout failures in Concourse CI.
// These hangs are still being observed only in CI, and a definitive RCA has not yet been accomplished
completed := make(chan bool)
Expand Down Expand Up @@ -2146,9 +2104,6 @@ LANGUAGE plpgsql NO SQL;`)
extractDirectory := extractSavedTarFile(backupDir, tarBaseName)
defer testhelper.AssertQueryRuns(restoreConn, `DROP SCHEMA IF EXISTS schemaone CASCADE;`)

isMultiNode := (backupCluster.GetHostForContent(0) != backupCluster.GetHostForContent(-1))
moveSegmentBackupFiles(tarBaseName, extractDirectory, isMultiNode, fullTimestamp)

gprestoreArgs := []string{
"--timestamp", fullTimestamp,
"--redirect-db", "restoredb",
Expand Down
2 changes: 2 additions & 0 deletions helper/restore_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -382,6 +382,8 @@ func doRestoreAgent() error {

func constructSingleTableFilename(name string, contentToRestore int, oid int) string {
name = strings.ReplaceAll(name, fmt.Sprintf("gpbackup_%d", *content), fmt.Sprintf("gpbackup_%d", contentToRestore))
// change the path to the file being restored, replacing not only the file name, but also the directory name
name = strings.ReplaceAll(name, fmt.Sprintf("%d/backups/", *content), fmt.Sprintf("%d/backups/", contentToRestore))
nameParts := strings.Split(name, ".")
filename := fmt.Sprintf("%s_%d", nameParts[0], oid)
if len(nameParts) > 1 { // We only expect filenames ending in ".gz" or ".zst", but they can contain dots so handle arbitrary numbers of dots
Expand Down
14 changes: 12 additions & 2 deletions restore/remote.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,19 @@ func VerifyBackupDirectoriesExistOnAllHosts() {
}

func VerifyBackupFileCountOnSegments() {
origSize, destSize, isResizeRestore := GetResizeClusterInfo()

remoteOutput := globalCluster.GenerateAndExecuteCommand("Verifying backup file count", cluster.ON_SEGMENTS, func(contentID int) string {
return fmt.Sprintf("find %s -type f | wc -l", globalFPInfo.GetDirForContent(contentID))
dirs := ""
/*
* collect actual data not only from the directory for one segment, but also
* from the remaining directories that will be restored on this segment
*/
for contentID < origSize || contentID < destSize {
dirs += " " + fmt.Sprintf("%s", globalFPInfo.GetDirForContent(contentID))
contentID += destSize
}
return fmt.Sprintf("find %s -type f | wc -l", dirs)
})
globalCluster.CheckClusterError(remoteOutput, "Could not verify backup file count", func(contentID int) string {
return "Could not verify backup file count"
Expand All @@ -51,7 +62,6 @@ func VerifyBackupFileCountOnSegments() {
fileCount = len(globalTOC.DataEntries)
}

origSize, destSize, isResizeRestore := GetResizeClusterInfo()
batchMap := make(map[int]int, len(remoteOutput.Commands))
for i := 0; i < origSize; i++ {
batchMap[i%destSize] += fileCount
Expand Down
Loading