Skip to content

Commit eaaa206

Browse files
Add StalledDiskPrimary analysis and recovery to vtorc (#17470)
Signed-off-by: Manan Gupta <manan@planetscale.com>
1 parent 71ccd6d commit eaaa206

19 files changed

+377
-29
lines changed

changelog/22.0/22.0.0/summary.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
- **[Support for More Efficient JSON Replication](#efficient-json-replication)**
1313
- **[Support for LAST_INSERT_ID(x)](#last-insert-id)**
1414
- **[Support for Maximum Idle Connections in the Pool](#max-idle-connections)**
15+
- **[Stalled Disk Recovery in VTOrc](#stall-disk-recovery)**
1516
- **[Minor Changes](#minor-changes)**
1617
- **[VTTablet Flags](#flags-vttablet)**
1718
- **[Topology read concurrency behaviour changes](#topo-read-concurrency-changes)**
@@ -100,6 +101,11 @@ You can control idle connection retention for the query server’s query pool, s
100101

101102
This feature ensures that, during traffic spikes, idle connections are available for faster responses, while minimizing overhead in low-traffic periods by limiting the number of idle connections retained. It helps strike a balance between performance, efficiency, and cost.
102103

104+
### <a id="stall-disk-recovery"/>Stalled Disk Recovery in VTOrc</a>
105+
VTOrc can now identify and recover from stalled disk errors. VTTablets test whether the disk is writable and they send this information in the full status output to VTOrc. If the disk is not writable on the primary tablet, VTOrc will attempt to recover the cluster by promoting a new primary. This is useful in scenarios where the disk is stalled and the primary vttablet is unable to accept writes because of it.
106+
107+
To opt into this feature, `--enable-primary-disk-stalled-recovery` flag has to be specified on VTOrc, and `--disk-write-dir` flag has to be specified on the vttablets. `--disk-write-interval` and `--disk-write-timeout` flags can be used to configure the polling interval and timeout respectively.
108+
103109
## <a id="minor-changes"/>Minor Changes</a>
104110

105111
#### <a id="flags-vttablet"/>VTTablet Flags</a>

go/flags/endtoend/vtcombo.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ Flags:
102102
--ddl_strategy string Set default strategy for DDL statements. Override with @@ddl_strategy session variable (default "direct")
103103
--default_tablet_type topodatapb.TabletType The default tablet type to set for queries, when one is not explicitly selected. (default PRIMARY)
104104
--degraded_threshold duration replication lag after which a replica is considered degraded (default 30s)
105+
--disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
106+
--disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
107+
--disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
105108
--emit_stats If set, emit stats to push-based monitoring and stats backends
106109
--enable-consolidator Synonym to -enable_consolidator (default true)
107110
--enable-consolidator-replicas Synonym to -enable_consolidator_replicas

go/flags/endtoend/vtorc.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Flags:
3333
--config-type string Config file type (omit to infer config type from file extension).
3434
--consul_auth_static_file string JSON File to read the topos/tokens from.
3535
--emit_stats If set, emit stats to push-based monitoring and stats backends
36+
--enable-primary-disk-stalled-recovery Whether VTOrc should detect a stalled disk on the primary and failover
3637
--grpc-dial-concurrency-limit int Maximum concurrency of grpc dial operations. This should be less than the golang max thread limit of 10000. (default 1024)
3738
--grpc_auth_static_client_creds string When using grpc_static_auth in the server, this file provides the credentials to use to authenticate with server.
3839
--grpc_compression string Which protocol to use for compressing gRPC. Default: nothing. Supported: snappy

go/flags/endtoend/vttablet.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ Flags:
133133
--dba_idle_timeout duration Idle timeout for dba connections (default 1m0s)
134134
--dba_pool_size int Size of the connection pool for dba connections (default 20)
135135
--degraded_threshold duration replication lag after which a replica is considered degraded (default 30s)
136+
--disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
137+
--disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
138+
--disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
136139
--emit_stats If set, emit stats to push-based monitoring and stats backends
137140
--enable-consolidator Synonym to -enable_consolidator (default true)
138141
--enable-consolidator-replicas Synonym to -enable_consolidator_replicas

go/vt/vtorc/config/config.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -174,6 +174,15 @@ var (
174174
Dynamic: true,
175175
},
176176
)
177+
178+
enablePrimaryDiskStalledRecovery = viperutil.Configure(
179+
"enable-primary-disk-stalled-recovery",
180+
viperutil.Options[bool]{
181+
FlagName: "enable-primary-disk-stalled-recovery",
182+
Default: false,
183+
Dynamic: true,
184+
},
185+
)
177186
)
178187

179188
func init() {
@@ -197,6 +206,7 @@ func registerFlags(fs *pflag.FlagSet) {
197206
fs.Duration("recovery-poll-duration", recoveryPollDuration.Default(), "Timer duration on which VTOrc polls its database to run a recovery")
198207
fs.Bool("allow-emergency-reparent", ersEnabled.Default(), "Whether VTOrc should be allowed to run emergency reparent operation when it detects a dead primary")
199208
fs.Bool("change-tablets-with-errant-gtid-to-drained", convertTabletsWithErrantGTIDs.Default(), "Whether VTOrc should be changing the type of tablets with errant GTIDs to DRAINED")
209+
fs.Bool("enable-primary-disk-stalled-recovery", enablePrimaryDiskStalledRecovery.Default(), "Whether VTOrc should detect a stalled disk on the primary and failover")
200210

201211
viperutil.BindFlags(fs,
202212
instancePollTime,
@@ -214,6 +224,7 @@ func registerFlags(fs *pflag.FlagSet) {
214224
recoveryPollDuration,
215225
ersEnabled,
216226
convertTabletsWithErrantGTIDs,
227+
enablePrimaryDiskStalledRecovery,
217228
)
218229
}
219230

@@ -332,6 +343,11 @@ func SetConvertTabletWithErrantGTIDs(val bool) {
332343
convertTabletsWithErrantGTIDs.Set(val)
333344
}
334345

346+
// GetStalledDiskPrimaryRecovery reports whether VTOrc is allowed to check for and recovery stalled disk problems.
347+
func GetStalledDiskPrimaryRecovery() bool {
348+
return enablePrimaryDiskStalledRecovery.Get()
349+
}
350+
335351
// MarkConfigurationLoaded is called once configuration has first been loaded.
336352
// Listeners on ConfigurationLoaded will get a notification
337353
func MarkConfigurationLoaded() {

go/vt/vtorc/db/generate_base.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,7 @@ CREATE TABLE database_instance (
105105
semi_sync_primary_status TINYint NOT NULL DEFAULT 0,
106106
semi_sync_replica_status TINYint NOT NULL DEFAULT 0,
107107
semi_sync_primary_clients int NOT NULL DEFAULT 0,
108+
is_disk_stalled TINYint NOT NULL DEFAULT 0,
108109
PRIMARY KEY (alias)
109110
)`,
110111
`

go/vt/vtorc/inst/analysis.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,7 @@ const (
5656
LockedSemiSyncPrimaryHypothesis AnalysisCode = "LockedSemiSyncPrimaryHypothesis"
5757
LockedSemiSyncPrimary AnalysisCode = "LockedSemiSyncPrimary"
5858
ErrantGTIDDetected AnalysisCode = "ErrantGTIDDetected"
59+
PrimaryDiskStalled AnalysisCode = "PrimaryDiskStalled"
5960
)
6061

6162
type StructureAnalysisCode string
@@ -129,6 +130,7 @@ type ReplicationAnalysis struct {
129130
MaxReplicaGTIDMode string
130131
MaxReplicaGTIDErrant string
131132
IsReadOnly bool
133+
IsDiskStalled bool
132134
}
133135

134136
func (replicationAnalysis *ReplicationAnalysis) MarshalJSON() ([]byte, error) {

go/vt/vtorc/inst/analysis_dao.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -79,7 +79,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
7979
vitess_keyspace.durability_policy AS durability_policy,
8080
vitess_shard.primary_timestamp AS shard_primary_term_timestamp,
8181
primary_instance.read_only AS read_only,
82-
MIN(primary_instance.gtid_errant) AS gtid_errant,
82+
MIN(primary_instance.gtid_errant) AS gtid_errant,
8383
MIN(primary_instance.alias) IS NULL AS is_invalid,
8484
MIN(primary_instance.binary_log_file) AS binary_log_file,
8585
MIN(primary_instance.binary_log_pos) AS binary_log_pos,
@@ -233,7 +233,8 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
233233
COUNT(
234234
DISTINCT case when replica_instance.log_bin
235235
AND replica_instance.log_replica_updates then replica_instance.major_version else NULL end
236-
) AS count_distinct_logging_major_versions
236+
) AS count_distinct_logging_major_versions,
237+
primary_instance.is_disk_stalled != 0 AS is_disk_stalled
237238
FROM
238239
vitess_tablet
239240
JOIN vitess_keyspace ON (
@@ -354,6 +355,7 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
354355
a.HeartbeatInterval = m.GetFloat64("heartbeat_interval")
355356

356357
a.IsReadOnly = m.GetUint("read_only") == 1
358+
a.IsDiskStalled = m.GetBool("is_disk_stalled")
357359

358360
if !a.LastCheckValid {
359361
analysisMessage := fmt.Sprintf("analysis: Alias: %+v, Keyspace: %+v, Shard: %+v, IsPrimary: %+v, LastCheckValid: %+v, LastCheckPartialSuccess: %+v, CountReplicas: %+v, CountValidReplicas: %+v, CountValidReplicatingReplicas: %+v, CountLaggingReplicas: %+v, CountDelayedReplicas: %+v",
@@ -401,6 +403,10 @@ func GetReplicationAnalysis(keyspace string, shard string, hints *ReplicationAna
401403
} else if isInvalid {
402404
a.Analysis = InvalidReplica
403405
a.Description = "VTOrc hasn't been able to reach the replica even once since restart/shutdown"
406+
} else if a.IsClusterPrimary && !a.LastCheckValid && a.IsDiskStalled {
407+
a.Analysis = PrimaryDiskStalled
408+
a.Description = "Primary has a stalled disk"
409+
ca.hasClusterwideAction = true
404410
} else if a.IsClusterPrimary && !a.LastCheckValid && a.CountReplicas == 0 {
405411
a.Analysis = DeadPrimaryWithoutReplicas
406412
a.Description = "Primary cannot be reached by vtorc and has no replica"

go/vt/vtorc/inst/analysis_dao_test.go

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -34,10 +34,10 @@ var (
3434
// The initialSQL is a set of insert commands copied from a dump of an actual running VTOrc instances. The relevant insert commands are here.
3535
// This is a dump taken from a test running 4 tablets, zone1-101 is the primary, zone1-100 is a replica, zone1-112 is a rdonly and zone2-200 is a cross-cell replica.
3636
initialSQL = []string{
37-
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000112-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0);`,
38-
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000100-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0);`,
39-
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,'',0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2);`,
40-
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000200-relay-bin.000002',15815,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0);`,
37+
`INSERT INTO database_instance VALUES('zone1-0000000112','localhost',6747,'2022-12-28 07:26:04','2022-12-28 07:26:04',213696377,'8.0.31','ROW',1,1,'vt-0000000112-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000112-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-9240-92a06c3be3c2','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10816929,0,0,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-9240-92a06c3be3c2',1,1,'',1000000000000000000,1,0,0,0,false);`,
38+
`INSERT INTO database_instance VALUES('zone1-0000000100','localhost',6711,'2022-12-28 07:26:04','2022-12-28 07:26:04',1094500338,'8.0.31','ROW',1,1,'vt-0000000100-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000100-relay-bin.000002',15815,1,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a5138-8680-11ed-acf8-d6b0ef9f4eaa','2022-12-28 07:26:04','',1,0,0,'Homebrew','8.0','FULL',10103920,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a5138-8680-11ed-acf8-d6b0ef9f4eaa',1,1,'',1000000000000000000,1,0,1,0,false);`,
39+
`INSERT INTO database_instance VALUES('zone1-0000000101','localhost',6714,'2022-12-28 07:26:04','2022-12-28 07:26:04',390954723,'8.0.31','ROW',1,1,'vt-0000000101-bin.000001',15583,'',0,0,0,0,0,'',0,'',0,NULL,NULL,0,'','',0,'',0,0,0,'zone1','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a4cc4-8680-11ed-a104-47706090afbd','2022-12-28 07:26:04','',0,0,0,'Homebrew','8.0','FULL',11366095,1,1,'ON',1,'','','729a4cc4-8680-11ed-a104-47706090afbd',-1,-1,'',1000000000000000000,1,1,0,2,false);`,
40+
`INSERT INTO database_instance VALUES('zone2-0000000200','localhost',6756,'2022-12-28 07:26:05','2022-12-28 07:26:05',444286571,'8.0.31','ROW',1,1,'vt-0000000200-bin.000001',15963,'localhost',6714,8,4.0,1,1,'vt-0000000101-bin.000001',15583,'vt-0000000101-bin.000001',15583,0,0,1,'','',1,'vt-0000000200-relay-bin.000002',15815,1,0,'zone2','',0,0,0,1,'729a4cc4-8680-11ed-a104-47706090afbd:1-54','729a497c-8680-11ed-8ad4-3f51d747db75','2022-12-28 07:26:05','',1,0,0,'Homebrew','8.0','FULL',10443112,0,1,'ON',1,'729a4cc4-8680-11ed-a104-47706090afbd','','729a4cc4-8680-11ed-a104-47706090afbd,729a497c-8680-11ed-8ad4-3f51d747db75',1,1,'',1000000000000000000,1,0,1,0,false);`,
4141
`INSERT INTO vitess_tablet VALUES('zone1-0000000100','localhost',6711,'ks','0','zone1',2,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130307d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731307d20706f72745f6d61703a7b6b65793a227674222076616c75653a363730397d206b657973706163653a226b73222073686172643a22302220747970653a5245504c494341206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363731312064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
4242
`INSERT INTO vitess_tablet VALUES('zone1-0000000101','localhost',6714,'ks','0','zone1',1,'2022-12-28 07:23:25.129898+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3130317d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363731337d20706f72745f6d61703a7b6b65793a227674222076616c75653a363731327d206b657973706163653a226b73222073686172643a22302220747970653a5052494d415259206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a36373134207072696d6172795f7465726d5f73746172745f74696d653a7b7365636f6e64733a31363732323132323035206e616e6f7365636f6e64733a3132393839383030307d2064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
4343
`INSERT INTO vitess_tablet VALUES('zone1-0000000112','localhost',6747,'ks','0','zone1',3,'0001-01-01 00:00:00+00:00',X'616c6961733a7b63656c6c3a227a6f6e653122207569643a3131327d20686f73746e616d653a226c6f63616c686f73742220706f72745f6d61703a7b6b65793a2267727063222076616c75653a363734367d20706f72745f6d61703a7b6b65793a227674222076616c75653a363734357d206b657973706163653a226b73222073686172643a22302220747970653a52444f4e4c59206d7973716c5f686f73746e616d653a226c6f63616c686f737422206d7973716c5f706f72743a363734372064625f7365727665725f76657273696f6e3a22382e302e3331222064656661756c745f636f6e6e5f636f6c6c6174696f6e3a3435');`,
@@ -96,6 +96,29 @@ func TestGetReplicationAnalysisDecision(t *testing.T) {
9696
keyspaceWanted: "ks",
9797
shardWanted: "0",
9898
codeWanted: PrimaryTabletDeleted,
99+
}, {
100+
name: "StalledDiskPrimary",
101+
info: []*test.InfoForRecoveryAnalysis{{
102+
TabletInfo: &topodatapb.Tablet{
103+
Alias: &topodatapb.TabletAlias{Cell: "zon1", Uid: 100},
104+
Hostname: "localhost",
105+
Keyspace: "ks",
106+
Shard: "0",
107+
Type: topodatapb.TabletType_PRIMARY,
108+
MysqlHostname: "localhost",
109+
MysqlPort: 6709,
110+
},
111+
DurabilityPolicy: "none",
112+
LastCheckValid: 0,
113+
CountReplicas: 4,
114+
CountValidReplicas: 4,
115+
CountValidReplicatingReplicas: 0,
116+
IsPrimary: 1,
117+
IsStalledDisk: 1,
118+
}},
119+
keyspaceWanted: "ks",
120+
shardWanted: "0",
121+
codeWanted: PrimaryDiskStalled,
99122
}, {
100123
name: "DeadPrimary",
101124
info: []*test.InfoForRecoveryAnalysis{{

go/vt/vtorc/inst/instance.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -91,6 +91,7 @@ type Instance struct {
9191
IsUpToDate bool
9292
IsRecentlyChecked bool
9393
SecondsSinceLastSeen sql.NullInt64
94+
StalledDisk bool
9495

9596
AllowTLS bool
9697

0 commit comments

Comments
 (0)