Skip to content

Commit

Permalink
Do not batch replicated tables (#85)
Browse files Browse the repository at this point in the history
When restoring from a larger cluster to a smaller one, tables are batched from
several segments onto one. Such batching was disabled on the helper for
replicated tables, but not on the main process. As a result, the main process on
the coordinator launched several copy commands for replicated tables, which,
starting from the second, ended on segments with a missing pipe error. This
error was quietly ignored and could only be seen in the logs on the segments.

This patch disables batching for replicated tables on the coordinator.
  • Loading branch information
RekGRpth authored May 17, 2024
1 parent 5912550 commit 7a2dfce
Show file tree
Hide file tree
Showing 3 changed files with 235 additions and 11 deletions.
3 changes: 3 additions & 0 deletions end_to_end/end_to_end_suite_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2195,6 +2195,9 @@ LANGUAGE plpgsql NO SQL;`)
numSegments := dbconn.MustSelectString(restoreConn, "SELECT numsegments FROM gp_distribution_policy where localoid = 'schemaone.test_table'::regclass::oid")
Expect(numSegments).To(Equal(strconv.Itoa(segmentCount)))

// check there is no pipe errors on segments
errSegments := dbconn.MustSelectString(restoreConn, fmt.Sprintf("SELECT exists (SELECT * FROM gp_toolkit.__gp_log_segment_ext WHERE logdatabase = current_database() AND logmessage LIKE 'read err msg from pipe%%_%06d_%%')", gprestoreCmd.Process.Pid))
Expect(errSegments).To(Equal("false"))
},
Entry("Can backup a 1-segment cluster and restore to current cluster with replicated tables", "20221104023842", "1-segment-db-replicated"),
Entry("Can backup a 3-segment cluster and restore to current cluster with replicated tables", "20221104023611", "3-segment-db-replicated"),
Expand Down
6 changes: 3 additions & 3 deletions restore/data.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ var (
tableDelim = ","
)

func CopyTableIn(queryContext context.Context, connectionPool *dbconn.DBConn, tableName string, tableAttributes string, destinationToRead string, singleDataFile bool, whichConn int) (int64, error) {
func CopyTableIn(queryContext context.Context, connectionPool *dbconn.DBConn, tableName string, tableAttributes string, destinationToRead string, singleDataFile bool, whichConn int, isReplicated bool) (int64, error) {
whichConn = connectionPool.ValidateConnNum(whichConn)
copyCommand := ""
readFromDestinationCommand := "cat"
Expand All @@ -50,7 +50,7 @@ func CopyTableIn(queryContext context.Context, connectionPool *dbconn.DBConn, ta
// During a larger-to-smaller restore, we need multiple COPY passes to load all the data.
// One pass is sufficient for smaller-to-larger and normal restores.
batches := 1
if resizeCluster && origSize > destSize {
if !isReplicated && resizeCluster && origSize > destSize {
batches = origSize / destSize
if origSize%destSize != 0 {
batches += 1
Expand Down Expand Up @@ -89,7 +89,7 @@ func restoreSingleTableData(queryContext context.Context, fpInfo *filepath.FileP
destinationToRead = fpInfo.GetTableBackupFilePathForCopyCommand(entry.Oid, utils.GetPipeThroughProgram().Extension, backupConfig.SingleDataFile)
}
gplog.Debug("Reading from %s", destinationToRead)
numRowsRestored, err := CopyTableIn(queryContext, connectionPool, tableName, entry.AttributeString, destinationToRead, backupConfig.SingleDataFile, whichConn)
numRowsRestored, err := CopyTableIn(queryContext, connectionPool, tableName, entry.AttributeString, destinationToRead, backupConfig.SingleDataFile, whichConn, entry.IsReplicated)
if err != nil {
return err
}
Expand Down
Loading

0 comments on commit 7a2dfce

Please sign in to comment.