Skip to content

Commit 6f406db

Browse files
authored
Slack 19.0 backport 16997 (#568)
* PRS and ERS don't promote replicas taking backups (vitessio#16997) Signed-off-by: Eduardo J. Ortega U <5791035+ejortegau@users.noreply.github.com>
1 parent 508c86d commit 6f406db

File tree

19 files changed

+1199
-631
lines changed

19 files changed

+1199
-631
lines changed

go/vt/proto/replicationdata/replicationdata.pb.go

Lines changed: 108 additions & 87 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/vt/proto/replicationdata/replicationdata_vtproto.pb.go

Lines changed: 72 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/vt/proto/tabletmanagerdata/tabletmanagerdata.pb.go

Lines changed: 490 additions & 469 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/vt/proto/tabletmanagerdata/tabletmanagerdata_vtproto.pb.go

Lines changed: 70 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,6 +184,8 @@ type TabletManagerClient struct {
184184
EventJitter time.Duration
185185
ErrorAfter time.Duration
186186
}
187+
// Backing Up - keyed by tablet alias.
188+
TabletsBackupState map[string]bool
187189
// keyed by tablet alias.
188190
ChangeTabletTypeResult map[string]error
189191
ChangeTabletTypeDelays map[string]time.Duration
@@ -918,6 +920,9 @@ func (fake *TabletManagerClient) ReplicationStatus(ctx context.Context, tablet *
918920
}
919921

920922
if result, ok := fake.ReplicationStatusResults[key]; ok {
923+
if _, ok = fake.TabletsBackupState[key]; ok {
924+
result.Position.BackupRunning = fake.TabletsBackupState[key]
925+
}
921926
return result.Position, result.Error
922927
}
923928

go/vt/vtctl/reparentutil/emergency_reparenter.go

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ func (erp *EmergencyReparenter) reparentShardLocked(ctx context.Context, ev *eve
259259
// 2. Remove the tablets with the Must_not promote rule
260260
// 3. Remove cross-cell tablets if PreventCrossCellPromotion is specified
261261
// Our final primary candidate MUST belong to this list of valid candidates
262-
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, prevPrimary, opts)
262+
validCandidateTablets, err = erp.filterValidCandidates(validCandidateTablets, stoppedReplicationSnapshot.reachableTablets, stoppedReplicationSnapshot.tabletsBackupState, prevPrimary, opts)
263263
if err != nil {
264264
return err
265265
}
@@ -740,9 +740,12 @@ func (erp *EmergencyReparenter) promoteNewPrimary(
740740
return nil
741741
}
742742

743-
// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any constraint failures and can make forward progress on being promoted
744-
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
743+
// filterValidCandidates filters valid tablets, keeping only the ones which can successfully be promoted without any
744+
// constraint failures and can make forward progress on being promoted. It will filter out candidates taking backups
745+
// if possible.
746+
func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb.Tablet, tabletsReachable []*topodatapb.Tablet, tabletsBackupState map[string]bool, prevPrimary *topodatapb.Tablet, opts EmergencyReparentOptions) ([]*topodatapb.Tablet, error) {
745747
var restrictedValidTablets []*topodatapb.Tablet
748+
var notPreferredValidTablets []*topodatapb.Tablet
746749
for _, tablet := range validTablets {
747750
tabletAliasStr := topoproto.TabletAliasString(tablet.Alias)
748751
// Remove tablets which have MustNot promote rule since they must never be promoted
@@ -769,7 +772,18 @@ func (erp *EmergencyReparenter) filterValidCandidates(validTablets []*topodatapb
769772
}
770773
continue
771774
}
772-
restrictedValidTablets = append(restrictedValidTablets, tablet)
775+
// Put candidates that are running a backup in a separate list
776+
backingUp, ok := tabletsBackupState[tabletAliasStr]
777+
if ok && backingUp {
778+
erp.logger.Infof("Setting %s in list of valid candidates taking a backup", tabletAliasStr)
779+
notPreferredValidTablets = append(notPreferredValidTablets, tablet)
780+
} else {
781+
restrictedValidTablets = append(restrictedValidTablets, tablet)
782+
}
783+
}
784+
if len(restrictedValidTablets) > 0 {
785+
return restrictedValidTablets, nil
773786
}
774-
return restrictedValidTablets, nil
787+
788+
return notPreferredValidTablets, nil
775789
}

0 commit comments

Comments
 (0)