diff --git a/go/test/endtoend/reparent/plannedreparent/reparent_test.go b/go/test/endtoend/reparent/plannedreparent/reparent_test.go index ae9bd6bbc9b..ccfd2eee239 100644 --- a/go/test/endtoend/reparent/plannedreparent/reparent_test.go +++ b/go/test/endtoend/reparent/plannedreparent/reparent_test.go @@ -297,17 +297,14 @@ func TestReparentWithDownReplica(t *testing.T) { // Perform a graceful reparent operation. It will fail as one tablet is down. out, err := utils.Prs(t, clusterInstance, tablets[1]) require.Error(t, err) - var insertVal int // Assert that PRS failed - if clusterInstance.VtctlMajorVersion <= 17 { - assert.True(t, utils.SetReplicationSourceFailed(tablets[2], out)) - // insert data into the new primary, check the connected replica work - insertVal = utils.ConfirmReplication(t, tablets[1], []*cluster.Vttablet{tablets[0], tablets[3]}) - } else { + if clusterInstance.VtctlMajorVersion <= 20 { assert.Contains(t, out, fmt.Sprintf("TabletManager.PrimaryStatus on %s", tablets[2].Alias)) - // insert data into the old primary, check the connected replica works. The primary tablet shouldn't have changed. - insertVal = utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[3]}) + } else { + assert.Contains(t, out, fmt.Sprintf("TabletManager.GetGlobalStatusVars on %s", tablets[2].Alias)) } + // insert data into the old primary, check the connected replica works. The primary tablet shouldn't have changed. + insertVal := utils.ConfirmReplication(t, tablets[0], []*cluster.Vttablet{tablets[1], tablets[3]}) // restart mysql on the old replica, should still be connecting to the old primary tablets[2].MysqlctlProcess.InitMysql = false diff --git a/go/vt/mysqlctl/fakemysqldaemon.go b/go/vt/mysqlctl/fakemysqldaemon.go index 317aed4f578..7cdca498ded 100644 --- a/go/vt/mysqlctl/fakemysqldaemon.go +++ b/go/vt/mysqlctl/fakemysqldaemon.go @@ -96,6 +96,9 @@ type FakeMysqlDaemon struct { // PrimaryStatusError is used by PrimaryStatus. PrimaryStatusError error + // GlobalStatusVars is used by GetGlobalStatusVars. + GlobalStatusVars map[string]string + // CurrentSourceHost is returned by ReplicationStatus. CurrentSourceHost string @@ -419,9 +422,7 @@ func (fmd *FakeMysqlDaemon) SetSuperReadOnly(ctx context.Context, on bool) (Rese // GetGlobalStatusVars is part of the MysqlDaemon interface. func (fmd *FakeMysqlDaemon) GetGlobalStatusVars(ctx context.Context, variables []string) (map[string]string, error) { - return make(map[string]string), fmd.ExecuteSuperQueryList(ctx, []string{ - "FAKE " + getGlobalStatusQuery, - }) + return fmd.GlobalStatusVars, nil } // StartReplication is part of the MysqlDaemon interface. diff --git a/go/vt/vtctl/grpcvtctldserver/server_slow_test.go b/go/vt/vtctl/grpcvtctldserver/server_slow_test.go index 4d7c5aa1943..6c73bb1d264 100644 --- a/go/vt/vtctl/grpcvtctldserver/server_slow_test.go +++ b/go/vt/vtctl/grpcvtctldserver/server_slow_test.go @@ -25,6 +25,7 @@ import ( "github.com/stretchr/testify/require" "vitess.io/vitess/go/mysql/replication" + "vitess.io/vitess/go/vt/vtctl/reparentutil" "vitess.io/vitess/go/mysql" "vitess.io/vitess/go/protoutil" @@ -402,19 +403,24 @@ func TestPlannedReparentShardSlow(t *testing.T) { Error: nil, }, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000101": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, }, PrimaryPositionResults: map[string]struct { @@ -519,19 +525,24 @@ func TestPlannedReparentShardSlow(t *testing.T) { Error: nil, }, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000101": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, }, PrimaryPositionResults: map[string]struct { diff --git a/go/vt/vtctl/grpcvtctldserver/server_test.go b/go/vt/vtctl/grpcvtctldserver/server_test.go index 1b641488e1a..b98087d0560 100644 --- a/go/vt/vtctl/grpcvtctldserver/server_test.go +++ b/go/vt/vtctl/grpcvtctldserver/server_test.go @@ -29,6 +29,7 @@ import ( "time" _flag "vitess.io/vitess/go/internal/flag" + "vitess.io/vitess/go/vt/vtctl/reparentutil" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" @@ -8296,19 +8297,24 @@ func TestPlannedReparentShard(t *testing.T) { Error: nil, }, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000101": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, }, PrimaryPositionResults: map[string]struct { @@ -8425,19 +8431,22 @@ func TestPlannedReparentShard(t *testing.T) { }, }, tmc: &testutil.TabletManagerClient{ - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Error: fmt.Errorf("primary status failed"), + Error: fmt.Errorf("global status vars failed"), }, "zone1-0000000101": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + reparentutil.InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -8451,7 +8460,7 @@ func TestPlannedReparentShard(t *testing.T) { WaitReplicasTimeout: protoutil.DurationToProto(time.Millisecond * 10), }, expectEventsToOccur: true, - expectedErr: "primary status failed", + expectedErr: "global status vars failed", }, } diff --git a/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go b/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go index ba560129459..a17ad376e0b 100644 --- a/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go +++ b/go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go @@ -250,6 +250,7 @@ type TabletManagerClient struct { Schema *tabletmanagerdatapb.SchemaDefinition Error error } + GetGlobalStatusVarsDelays map[string]time.Duration GetGlobalStatusVarsResults map[string]struct { Statuses map[string]string Error error @@ -742,6 +743,17 @@ func (fake *TabletManagerClient) GetGlobalStatusVars(ctx context.Context, tablet } key := topoproto.TabletAliasString(tablet.Alias) + if fake.GetGlobalStatusVarsDelays != nil { + if delay, ok := fake.GetGlobalStatusVarsDelays[key]; ok { + select { + case <-ctx.Done(): + return nil, ctx.Err() + case <-time.After(delay): + // proceed to results + } + } + } + if result, ok := fake.GetGlobalStatusVarsResults[key]; ok { return result.Statuses, result.Error } diff --git a/go/vt/vtctl/reparentutil/emergency_reparenter.go b/go/vt/vtctl/reparentutil/emergency_reparenter.go index e65f1891872..b776a39e1d7 100644 --- a/go/vt/vtctl/reparentutil/emergency_reparenter.go +++ b/go/vt/vtctl/reparentutil/emergency_reparenter.go @@ -390,7 +390,7 @@ func (erp *EmergencyReparenter) findMostAdvanced( } // sort the tablets for finding the best intermediate source in ERS - err = sortTabletsForReparent(validTablets, tabletPositions, opts.durability) + err = sortTabletsForReparent(validTablets, tabletPositions, nil, opts.durability) if err != nil { return nil, nil, err } diff --git a/go/vt/vtctl/reparentutil/planned_reparenter.go b/go/vt/vtctl/reparentutil/planned_reparenter.go index 49741f44574..e09761ea982 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter.go @@ -19,6 +19,7 @@ package reparentutil import ( "context" "fmt" + "strconv" "sync" "time" @@ -44,6 +45,7 @@ var ( prsCounter = stats.NewCountersWithMultiLabels("PlannedReparentCounts", "Number of times Planned Reparent Shard has been run", []string{"Keyspace", "Shard", "Result"}, ) + InnodbBufferPoolsDataVar = "Innodb_buffer_pool_pages_data" ) // PlannedReparenter performs PlannedReparentShard operations. @@ -158,6 +160,7 @@ func (pr *PlannedReparenter) preflightChecks( ctx context.Context, ev *events.Reparent, tabletMap map[string]*topo.TabletInfo, + innodbBufferPoolData map[string]int, opts *PlannedReparentOptions, // we take a pointer here to set NewPrimaryAlias ) (isNoop bool, err error) { // We don't want to fail when both NewPrimaryAlias and AvoidPrimaryAlias are nil. @@ -178,7 +181,7 @@ func (pr *PlannedReparenter) preflightChecks( } event.DispatchUpdate(ev, "electing a primary candidate") - opts.NewPrimaryAlias, err = ElectNewPrimary(ctx, pr.tmc, &ev.ShardInfo, tabletMap, opts.NewPrimaryAlias, opts.AvoidPrimaryAlias, opts.WaitReplicasTimeout, opts.TolerableReplLag, opts.durability, pr.logger) + opts.NewPrimaryAlias, err = ElectNewPrimary(ctx, pr.tmc, &ev.ShardInfo, tabletMap, innodbBufferPoolData, opts.NewPrimaryAlias, opts.AvoidPrimaryAlias, opts.WaitReplicasTimeout, opts.TolerableReplLag, opts.durability, pr.logger) if err != nil { return true, err } @@ -523,13 +526,13 @@ func (pr *PlannedReparenter) reparentShardLocked( return err } - err = pr.verifyAllTabletsReachable(ctx, tabletMap) + innodbBufferPoolData, err := pr.verifyAllTabletsReachable(ctx, tabletMap) if err != nil { return err } // Check invariants that PlannedReparentShard depends on. - if isNoop, err := pr.preflightChecks(ctx, ev, tabletMap, &opts); err != nil { + if isNoop, err := pr.preflightChecks(ctx, ev, tabletMap, innodbBufferPoolData, &opts); err != nil { return err } else if isNoop { return nil @@ -730,18 +733,30 @@ func (pr *PlannedReparenter) reparentTablets( } // verifyAllTabletsReachable verifies that all the tablets are reachable when running PRS. -func (pr *PlannedReparenter) verifyAllTabletsReachable(ctx context.Context, tabletMap map[string]*topo.TabletInfo) error { +func (pr *PlannedReparenter) verifyAllTabletsReachable(ctx context.Context, tabletMap map[string]*topo.TabletInfo) (map[string]int, error) { // Create a cancellable context for the entire set of RPCs to verify reachability. verifyCtx, verifyCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout) defer verifyCancel() + innodbBufferPoolsData := make(map[string]int) + var mu sync.Mutex errorGroup, groupCtx := errgroup.WithContext(verifyCtx) - for _, info := range tabletMap { + for tblStr, info := range tabletMap { tablet := info.Tablet errorGroup.Go(func() error { - _, err := pr.tmc.PrimaryStatus(groupCtx, tablet) - return err + statusValues, err := pr.tmc.GetGlobalStatusVars(groupCtx, tablet, []string{InnodbBufferPoolsDataVar}) + if err != nil { + return err + } + // We are ignoring the error in conversion because some MySQL variants might not have this + // status variable like MariaDB. + val, _ := strconv.Atoi(statusValues[InnodbBufferPoolsDataVar]) + mu.Lock() + defer mu.Unlock() + innodbBufferPoolsData[tblStr] = val + return nil }) } - return errorGroup.Wait() + err := errorGroup.Wait() + return innodbBufferPoolsData, err } diff --git a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go index 779390179b9..3a37e178ff8 100644 --- a/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go +++ b/go/vt/vtctl/reparentutil/planned_reparenter_flaky_test.go @@ -108,16 +108,19 @@ func TestPlannedReparenter_ReparentShard(t *testing.T) { SetReadWriteResults: map[string]error{ "zone1-0000000100": nil, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -229,16 +232,19 @@ func TestPlannedReparenter_ReparentShard(t *testing.T) { PopulateReparentJournalResults: map[string]error{ "zone1-0000000200": nil, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -318,16 +324,19 @@ func TestPlannedReparenter_ReparentShard(t *testing.T) { SetReadWriteResults: map[string]error{ "zone1-0000000100": nil, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -390,13 +399,14 @@ func TestPlannedReparenter_ReparentShard(t *testing.T) { // thoroughly to cover all the cases. name: "reparent fails", tmc: &testutil.TabletManagerClient{ - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -556,11 +566,12 @@ func TestPlannedReparenter_preflightChecks(t *testing.T) { tmc tmclient.TabletManagerClient tablets []*topodatapb.Tablet - ev *events.Reparent - keyspace string - shard string - tabletMap map[string]*topo.TabletInfo - opts *PlannedReparentOptions + ev *events.Reparent + keyspace string + shard string + tabletMap map[string]*topo.TabletInfo + innodbBufferPoolData map[string]int + opts *PlannedReparentOptions expectedIsNoop bool expectedEvent *events.Reparent @@ -812,6 +823,104 @@ func TestPlannedReparenter_preflightChecks(t *testing.T) { }, shouldErr: false, }, + { + name: "primary selection based on buffer pool", + tmc: &testutil.TabletManagerClient{ + ReplicationStatusResults: map[string]struct { + Position *replicationdatapb.Status + Error error + }{ + "zone1-0000000100": { // most advanced position + Position: &replicationdatapb.Status{ + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-10", + }, + }, + "zone1-0000000101": { + Position: &replicationdatapb.Status{ + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-10", + }, + }, + }, + }, + innodbBufferPoolData: map[string]int{ + "zone1-0000000100": 100, + "zone1-0000000101": 200, + }, + ev: &events.Reparent{ + ShardInfo: *topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{ + PrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 500, + }, + }, nil), + }, + tabletMap: map[string]*topo.TabletInfo{ + "zone1-0000000100": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000101": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "zone1-0000000500": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 500, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + }, + opts: &PlannedReparentOptions{ + // Avoid the current primary. + AvoidPrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 500, + }, + durability: &durabilityNone{}, + }, + expectedIsNoop: false, + expectedEvent: &events.Reparent{ + ShardInfo: *topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{ + PrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 500, + }, + }, nil), + NewPrimary: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + expectedOpts: &PlannedReparentOptions{ + AvoidPrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 500, + }, + // NewPrimaryAlias gets populated by the preflightCheck code + NewPrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + durability: &durabilityNone{}, + }, + shouldErr: false, + }, { name: "new-primary and avoid-primary match", opts: &PlannedReparentOptions{ @@ -1081,7 +1190,7 @@ func TestPlannedReparenter_preflightChecks(t *testing.T) { require.NoError(t, err) tt.opts.durability = durability } - isNoop, err := pr.preflightChecks(ctx, tt.ev, tt.tabletMap, tt.opts) + isNoop, err := pr.preflightChecks(ctx, tt.ev, tt.tabletMap, tt.innodbBufferPoolData, tt.opts) if tt.shouldErr { assert.Error(t, err) assert.Equal(t, tt.expectedIsNoop, isNoop, "preflightChecks returned wrong isNoop signal") @@ -2431,16 +2540,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { Error: nil, }, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, PopulateReparentJournalResults: map[string]error{ @@ -2523,16 +2635,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { SetReadWriteResults: map[string]error{ "zone1-0000000100": nil, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -2603,16 +2718,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { Error: nil, }, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, PrimaryPositionResults: map[string]struct { @@ -2715,16 +2833,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { SetReplicationSourceResults: map[string]error{ "zone1-0000000100": nil, // called during reparentTablets to make this tablet a replica of newPrimary }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -2808,16 +2929,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { SetReplicationSourceResults: map[string]error{ "zone1-0000000100": nil, // called during reparentTablets to make this tablet a replica of newPrimary }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -2867,16 +2991,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { { name: "preflight checks determine PRS is no-op", tmc: &testutil.TabletManagerClient{ - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -2930,16 +3057,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { SetReadWriteResults: map[string]error{ "zone1-0000000100": assert.AnError, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, }, @@ -2998,16 +3128,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { { name: "lost topology lock", tmc: &testutil.TabletManagerClient{ - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, PrimaryPositionResults: map[string]struct { @@ -3089,16 +3222,19 @@ func TestPlannedReparenter_reparentShardLocked(t *testing.T) { Error: nil, }, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, PopulateReparentJournalResults: map[string]error{ @@ -3849,27 +3985,34 @@ func AssertReparentEventsEqual(t *testing.T, expected *events.Reparent, actual * // TestPlannedReparenter_verifyAllTabletsReachable tests the functionality of verifyAllTabletsReachable. func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { tests := []struct { - name string - tmc tmclient.TabletManagerClient - tabletMap map[string]*topo.TabletInfo - remoteOpTime time.Duration - wantErr string + name string + tmc tmclient.TabletManagerClient + tabletMap map[string]*topo.TabletInfo + remoteOpTime time.Duration + wantErr string + wantBufferPoolsData map[string]int }{ { name: "Success", tmc: &testutil.TabletManagerClient{ - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000201": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "1234", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "1231", + }, }, }, }, @@ -3902,21 +4045,30 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { }, }, }, + wantBufferPoolsData: map[string]int{ + "zone1-0000000200": 123, + "zone1-0000000201": 1234, + "zone1-0000000100": 1231, + }, }, { name: "Failure", tmc: &testutil.TabletManagerClient{ - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { Error: fmt.Errorf("primary status failed"), }, "zone1-0000000201": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "1234", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "1231", + }, }, }, }, @@ -3953,21 +4105,27 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { }, { name: "Timeout", tmc: &testutil.TabletManagerClient{ - PrimaryStatusDelays: map[string]time.Duration{ + GetGlobalStatusVarsDelays: map[string]time.Duration{ "zone1-0000000100": 20 * time.Second, }, - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000200": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000201": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "1234", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "1231", + }, }, }, }, @@ -4022,9 +4180,13 @@ func TestPlannedReparenter_verifyAllTabletsReachable(t *testing.T) { topo.RemoteOperationTimeout = oldTime }() } - err := pr.verifyAllTabletsReachable(context.Background(), tt.tabletMap) + innodbBufferPoolsData, err := pr.verifyAllTabletsReachable(context.Background(), tt.tabletMap) if tt.wantErr == "" { require.NoError(t, err) + require.EqualValues(t, len(tt.wantBufferPoolsData), len(innodbBufferPoolsData)) + for str, val := range tt.wantBufferPoolsData { + require.EqualValues(t, val, innodbBufferPoolsData[str]) + } return } require.ErrorContains(t, err, tt.wantErr) @@ -4055,16 +4217,19 @@ func TestPlannedReparenterStats(t *testing.T) { SetReadWriteResults: map[string]error{ "zone1-0000000100": nil, }, - // This is only needed to verify reachability, so empty results are fine. - PrimaryStatusResults: map[string]struct { - Status *replicationdatapb.PrimaryStatus - Error error + GetGlobalStatusVarsResults: map[string]struct { + Statuses map[string]string + Error error }{ "zone1-0000000101": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, "zone1-0000000100": { - Status: &replicationdatapb.PrimaryStatus{}, + Statuses: map[string]string{ + InnodbBufferPoolsDataVar: "123", + }, }, }, } diff --git a/go/vt/vtctl/reparentutil/reparent_sorter.go b/go/vt/vtctl/reparentutil/reparent_sorter.go index e4461b78064..ea7367bd36b 100644 --- a/go/vt/vtctl/reparentutil/reparent_sorter.go +++ b/go/vt/vtctl/reparentutil/reparent_sorter.go @@ -29,17 +29,19 @@ import ( // reparentSorter sorts tablets by GTID positions and Promotion rules aimed at finding the best // candidate for intermediate promotion in emergency reparent shard, and the new primary in planned reparent shard type reparentSorter struct { - tablets []*topodatapb.Tablet - positions []replication.Position - durability Durabler + tablets []*topodatapb.Tablet + positions []replication.Position + innodbBufferPool []int + durability Durabler } // newReparentSorter creates a new reparentSorter -func newReparentSorter(tablets []*topodatapb.Tablet, positions []replication.Position, durability Durabler) *reparentSorter { +func newReparentSorter(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, durability Durabler) *reparentSorter { return &reparentSorter{ - tablets: tablets, - positions: positions, - durability: durability, + tablets: tablets, + positions: positions, + durability: durability, + innodbBufferPool: innodbBufferPool, } } @@ -50,6 +52,9 @@ func (rs *reparentSorter) Len() int { return len(rs.tablets) } func (rs *reparentSorter) Swap(i, j int) { rs.tablets[i], rs.tablets[j] = rs.tablets[j], rs.tablets[i] rs.positions[i], rs.positions[j] = rs.positions[j], rs.positions[i] + if len(rs.innodbBufferPool) != 0 { + rs.innodbBufferPool[i], rs.innodbBufferPool[j] = rs.innodbBufferPool[j], rs.innodbBufferPool[i] + } } // Less implements the Interface for sorting @@ -79,18 +84,29 @@ func (rs *reparentSorter) Less(i, j int) bool { // so we check their promotion rules jPromotionRule := PromotionRule(rs.durability, rs.tablets[j]) iPromotionRule := PromotionRule(rs.durability, rs.tablets[i]) + + // If the promotion rules are different then we want to sort by the promotion rules. + if len(rs.innodbBufferPool) != 0 && jPromotionRule == iPromotionRule { + if rs.innodbBufferPool[i] > rs.innodbBufferPool[j] { + return true + } + if rs.innodbBufferPool[j] > rs.innodbBufferPool[i] { + return false + } + } + return !jPromotionRule.BetterThan(iPromotionRule) } // sortTabletsForReparent sorts the tablets, given their positions for emergency reparent shard and planned reparent shard. // Tablets are sorted first by their replication positions, with ties broken by the promotion rules. -func sortTabletsForReparent(tablets []*topodatapb.Tablet, positions []replication.Position, durability Durabler) error { +func sortTabletsForReparent(tablets []*topodatapb.Tablet, positions []replication.Position, innodbBufferPool []int, durability Durabler) error { // throw an error internal error in case of unequal number of tablets and positions // fail-safe code prevents panic in sorting in case the lengths are unequal if len(tablets) != len(positions) { return vterrors.Errorf(vtrpcpb.Code_INTERNAL, "unequal number of tablets and positions") } - sort.Sort(newReparentSorter(tablets, positions, durability)) + sort.Sort(newReparentSorter(tablets, positions, innodbBufferPool, durability)) return nil } diff --git a/go/vt/vtctl/reparentutil/reparent_sorter_test.go b/go/vt/vtctl/reparentutil/reparent_sorter_test.go index c21c95ad22b..ae5d56e884e 100644 --- a/go/vt/vtctl/reparentutil/reparent_sorter_test.go +++ b/go/vt/vtctl/reparentutil/reparent_sorter_test.go @@ -89,17 +89,24 @@ func TestReparentSorter(t *testing.T) { positionIntermediate2.GTIDSet = positionIntermediate2.GTIDSet.AddGTID(mysqlGTID2) testcases := []struct { - name string - tablets []*topodatapb.Tablet - positions []replication.Position - containsErr string - sortedTablets []*topodatapb.Tablet + name string + tablets []*topodatapb.Tablet + innodbBufferPool []int + positions []replication.Position + containsErr string + sortedTablets []*topodatapb.Tablet }{ { name: "all advanced, sort via promotion rules", tablets: []*topodatapb.Tablet{nil, tabletReplica1_100, tabletRdonly1_102}, positions: []replication.Position{positionMostAdvanced, positionMostAdvanced, positionMostAdvanced}, sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletRdonly1_102, nil}, + }, { + name: "all advanced, sort via innodb buffer pool", + tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100}, + positions: []replication.Position{positionMostAdvanced, positionMostAdvanced, positionMostAdvanced}, + innodbBufferPool: []int{10, 40, 25}, + sortedTablets: []*topodatapb.Tablet{tabletReplica2_100, tabletReplica1_100, tabletReplica1_101}, }, { name: "ordering by position", tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102}, @@ -120,6 +127,12 @@ func TestReparentSorter(t *testing.T) { tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102}, positions: []replication.Position{positionEmpty, positionIntermediate1, positionMostAdvanced, positionIntermediate1}, sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletReplica2_100, tabletRdonly1_102, tabletReplica1_101}, + }, { + name: "mixed - another", + tablets: []*topodatapb.Tablet{tabletReplica1_101, tabletReplica2_100, tabletReplica1_100, tabletRdonly1_102}, + positions: []replication.Position{positionIntermediate1, positionIntermediate1, positionMostAdvanced, positionIntermediate1}, + innodbBufferPool: []int{100, 200, 0, 200}, + sortedTablets: []*topodatapb.Tablet{tabletReplica1_100, tabletReplica2_100, tabletReplica1_101, tabletRdonly1_102}, }, } @@ -127,7 +140,7 @@ func TestReparentSorter(t *testing.T) { require.NoError(t, err) for _, testcase := range testcases { t.Run(testcase.name, func(t *testing.T) { - err := sortTabletsForReparent(testcase.tablets, testcase.positions, durability) + err := sortTabletsForReparent(testcase.tablets, testcase.positions, testcase.innodbBufferPool, durability) if testcase.containsErr != "" { require.EqualError(t, err, testcase.containsErr) } else { diff --git a/go/vt/vtctl/reparentutil/util.go b/go/vt/vtctl/reparentutil/util.go index f35ea2695b8..ea7a9f7262c 100644 --- a/go/vt/vtctl/reparentutil/util.go +++ b/go/vt/vtctl/reparentutil/util.go @@ -68,6 +68,7 @@ func ElectNewPrimary( tmc tmclient.TabletManagerClient, shardInfo *topo.ShardInfo, tabletMap map[string]*topo.TabletInfo, + innodbBufferPoolData map[string]int, newPrimaryAlias *topodatapb.TabletAlias, avoidPrimaryAlias *topodatapb.TabletAlias, waitReplicasTimeout time.Duration, @@ -88,6 +89,7 @@ func ElectNewPrimary( // tablets that are possible candidates to be the new primary and their positions validTablets []*topodatapb.Tablet tabletPositions []replication.Position + innodbBufferPool []int errorGroup, groupCtx = errgroup.WithContext(ctx) ) @@ -134,6 +136,7 @@ func ElectNewPrimary( if err == nil && (tolerableReplLag == 0 || tolerableReplLag >= replLag) { validTablets = append(validTablets, tb) tabletPositions = append(tabletPositions, pos) + innodbBufferPool = append(innodbBufferPool, innodbBufferPoolData[topoproto.TabletAliasString(tb.Alias)]) } else { reasonsToInvalidate.WriteString(fmt.Sprintf("\n%v has %v replication lag which is more than the tolerable amount", topoproto.TabletAliasString(tablet.Alias), replLag)) } @@ -152,7 +155,7 @@ func ElectNewPrimary( } // sort the tablets for finding the best primary - err = sortTabletsForReparent(validTablets, tabletPositions, durability) + err = sortTabletsForReparent(validTablets, tabletPositions, innodbBufferPool, durability) if err != nil { return nil, err } diff --git a/go/vt/vtctl/reparentutil/util_test.go b/go/vt/vtctl/reparentutil/util_test.go index dd13e48f7b7..d42ae76f337 100644 --- a/go/vt/vtctl/reparentutil/util_test.go +++ b/go/vt/vtctl/reparentutil/util_test.go @@ -67,15 +67,16 @@ func TestElectNewPrimary(t *testing.T) { ctx := context.Background() logger := logutil.NewMemoryLogger() tests := []struct { - name string - tmc *chooseNewPrimaryTestTMClient - shardInfo *topo.ShardInfo - tabletMap map[string]*topo.TabletInfo - newPrimaryAlias *topodatapb.TabletAlias - avoidPrimaryAlias *topodatapb.TabletAlias - tolerableReplLag time.Duration - expected *topodatapb.TabletAlias - errContains []string + name string + tmc *chooseNewPrimaryTestTMClient + shardInfo *topo.ShardInfo + tabletMap map[string]*topo.TabletInfo + innodbBufferPoolData map[string]int + newPrimaryAlias *topodatapb.TabletAlias + avoidPrimaryAlias *topodatapb.TabletAlias + tolerableReplLag time.Duration + expected *topodatapb.TabletAlias + errContains []string }{ { name: "found a replica", @@ -472,6 +473,68 @@ func TestElectNewPrimary(t *testing.T) { }, errContains: nil, }, + { + name: "found a replica - more advanced innodb buffer pool", + tmc: &chooseNewPrimaryTestTMClient{ + replicationStatuses: map[string]*replicationdatapb.Status{ + "zone1-0000000101": { + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-2", + }, + "zone1-0000000102": { + Position: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1", + RelayLogPosition: "MySQL56/3E11FA47-71CA-11E1-9E33-C80AA9429562:1-2", + }, + }, + }, + innodbBufferPoolData: map[string]int{ + "zone1-0000000101": 200, + "zone1-0000000102": 100, + }, + shardInfo: topo.NewShardInfo("testkeyspace", "-", &topodatapb.Shard{ + PrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + }, nil), + tabletMap: map[string]*topo.TabletInfo{ + "primary": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 100, + }, + Type: topodatapb.TabletType_PRIMARY, + }, + }, + "replica1": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + "replica2": { + Tablet: &topodatapb.Tablet{ + Alias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 102, + }, + Type: topodatapb.TabletType_REPLICA, + }, + }, + }, + avoidPrimaryAlias: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 0, + }, + expected: &topodatapb.TabletAlias{ + Cell: "zone1", + Uid: 101, + }, + errContains: nil, + }, { name: "no active primary in shard", tmc: &chooseNewPrimaryTestTMClient{ @@ -731,7 +794,7 @@ zone1-0000000100 is not a replica`, t.Run(tt.name, func(t *testing.T) { t.Parallel() - actual, err := ElectNewPrimary(ctx, tt.tmc, tt.shardInfo, tt.tabletMap, tt.newPrimaryAlias, tt.avoidPrimaryAlias, time.Millisecond*50, tt.tolerableReplLag, durability, logger) + actual, err := ElectNewPrimary(ctx, tt.tmc, tt.shardInfo, tt.tabletMap, tt.innodbBufferPoolData, tt.newPrimaryAlias, tt.avoidPrimaryAlias, time.Millisecond*50, tt.tolerableReplLag, durability, logger) if len(tt.errContains) > 0 { for _, errC := range tt.errContains { assert.ErrorContains(t, err, errC)