Skip to content

Commit 8a9e435

Browse files
Merge remote-tracking branch 'upstream/main' into log-only-errors
Signed-off-by: Harshit Gangal <harshit@planetscale.com>
2 parents 48da715 + 10ff5e3 commit 8a9e435

36 files changed

+712
-77
lines changed

changelog/22.0/22.0.0/summary.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
- **[Support for More Efficient JSON Replication](#efficient-json-replication)**
1313
- **[Support for LAST_INSERT_ID(x)](#last-insert-id)**
1414
- **[Support for Maximum Idle Connections in the Pool](#max-idle-connections)**
15+
- **[Stalled Disk Recovery in VTOrc](#stall-disk-recovery)**
1516
- **[Support for Filtering Query logs on Error](#query-logs)**
1617
- **[Minor Changes](#minor-changes)**
1718
- **[VTTablet Flags](#flags-vttablet)**
@@ -101,6 +102,11 @@ You can control idle connection retention for the query server’s query pool, s
101102

102103
This feature ensures that, during traffic spikes, idle connections are available for faster responses, while minimizing overhead in low-traffic periods by limiting the number of idle connections retained. It helps strike a balance between performance, efficiency, and cost.
103104

105+
### <a id="stall-disk-recovery"/>Stalled Disk Recovery in VTOrc</a>
106+
VTOrc can now identify and recover from stalled disk errors. VTTablets test whether the disk is writable and they send this information in the full status output to VTOrc. If the disk is not writable on the primary tablet, VTOrc will attempt to recover the cluster by promoting a new primary. This is useful in scenarios where the disk is stalled and the primary vttablet is unable to accept writes because of it.
107+
108+
To opt into this feature, `--enable-primary-disk-stalled-recovery` flag has to be specified on VTOrc, and `--disk-write-dir` flag has to be specified on the vttablets. `--disk-write-interval` and `--disk-write-timeout` flags can be used to configure the polling interval and timeout respectively.
109+
104110
### <a id="query-logs"/>Support for Filtering Query logs on Error</a>
105111

106112
The `querylog-mode` setting can be configured to `error` to log only queries that result in errors. This option is supported in both VTGate and VTTablet.

go/cmd/vtctldclient/command/vreplication/vdiff/vdiff.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -436,7 +436,7 @@ State: {{.State}}
436436
RowsCompared: {{.RowsCompared}}
437437
HasMismatch: {{.HasMismatch}}
438438
StartedAt: {{.StartedAt}}
439-
{{if (eq .State "started")}}Progress: {{printf "%.2f" .Progress.Percentage}}%%{{if .Progress.ETA}}, ETA: {{.Progress.ETA}}{{end}}{{end}}
439+
{{if (eq .State "started")}}Progress: {{printf "%.2f" .Progress.Percentage}}%{{if .Progress.ETA}}, ETA: {{.Progress.ETA}}{{end}}{{end}}
440440
{{if .CompletedAt}}CompletedAt: {{.CompletedAt}}{{end}}
441441
{{range $table := .TableSummaryMap}}
442442
Table {{$table.TableName}}:

go/cmd/vtctldclient/command/vreplication/vdiff/vdiff_test.go

Lines changed: 69 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,75 @@ func TestVDiffSharded(t *testing.T) {
680680
}
681681
}
682682

683+
func TestVDiffTextTemplate(t *testing.T) {
684+
ctx, cancel := context.WithCancel(context.Background())
685+
defer cancel()
686+
env := newTestVDiffEnv(t, ctx, []string{"0"}, []string{"0"}, "", nil)
687+
defer env.close()
688+
689+
now := time.Now()
690+
UUID := uuid.New().String()
691+
req := &tabletmanagerdatapb.VDiffRequest{
692+
Keyspace: env.targetKeyspace,
693+
Workflow: env.workflow,
694+
Action: string(vdiff.ShowAction),
695+
ActionArg: UUID,
696+
}
697+
starttime := now.UTC().Format(vdiff.TimestampFormat)
698+
699+
testCases := []struct {
700+
id string
701+
res *sqltypes.Result
702+
report string
703+
}{{
704+
id: "1",
705+
res: sqltypes.MakeTestResult(fields,
706+
"started||t1|"+UUID+"|started|300|"+starttime+"|30||0|"+
707+
`{"TableName": "t1", "MatchingRows": 30, "ProcessedRows": 30, "MismatchedRows": 0, "ExtraRowsSource": 0, `+
708+
`"ExtraRowsTarget": 0}`),
709+
report: fmt.Sprintf(`
710+
VDiff Summary for targetks.vdiffTest (%s)
711+
State: started
712+
RowsCompared: 30
713+
HasMismatch: false
714+
StartedAt: %s
715+
Progress: 10.00%%, ETA: %s
716+
717+
Table t1:
718+
State: started
719+
ProcessedRows: 30
720+
MatchingRows: 30
721+
722+
Use "--format=json" for more detailed output.
723+
724+
`, UUID, starttime, starttime),
725+
}}
726+
727+
for _, tc := range testCases {
728+
t.Run(tc.id, func(t *testing.T) {
729+
res := &tabletmanagerdatapb.VDiffResponse{
730+
Id: 1,
731+
Output: sqltypes.ResultToProto3(tc.res),
732+
}
733+
env.tmc.setVDResults(env.tablets[200].tablet, req, res)
734+
req := &vtctldatapb.VDiffShowRequest{
735+
TargetKeyspace: env.targetKeyspace,
736+
Workflow: env.workflow,
737+
Arg: UUID,
738+
}
739+
740+
resp, err := env.ws.VDiffShow(context.Background(), req)
741+
require.NoError(t, err)
742+
vds, err := displayShowSingleSummary(env.out, "text", env.targetKeyspace, env.workflow, UUID, resp, true)
743+
require.NoError(t, err)
744+
require.Equal(t, vdiff.StartedState, vds)
745+
746+
require.Equal(t, tc.report, env.getOutput())
747+
env.resetOutput()
748+
})
749+
}
750+
}
751+
683752
func TestGetStructNames(t *testing.T) {
684753
type s struct {
685754
A string

go/flags/endtoend/vtcombo.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,9 @@ Flags:
102102
--ddl_strategy string Set default strategy for DDL statements. Override with @@ddl_strategy session variable (default "direct")
103103
--default_tablet_type topodatapb.TabletType The default tablet type to set for queries, when one is not explicitly selected. (default PRIMARY)
104104
--degraded_threshold duration replication lag after which a replica is considered degraded (default 30s)
105+
--disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
106+
--disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
107+
--disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
105108
--emit_stats If set, emit stats to push-based monitoring and stats backends
106109
--enable-consolidator Synonym to -enable_consolidator (default true)
107110
--enable-consolidator-replicas Synonym to -enable_consolidator_replicas

go/flags/endtoend/vtorc.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ Flags:
3333
--config-type string Config file type (omit to infer config type from file extension).
3434
--consul_auth_static_file string JSON File to read the topos/tokens from.
3535
--emit_stats If set, emit stats to push-based monitoring and stats backends
36+
--enable-primary-disk-stalled-recovery Whether VTOrc should detect a stalled disk on the primary and failover
3637
--grpc-dial-concurrency-limit int Maximum concurrency of grpc dial operations. This should be less than the golang max thread limit of 10000. (default 1024)
3738
--grpc_auth_static_client_creds string When using grpc_static_auth in the server, this file provides the credentials to use to authenticate with server.
3839
--grpc_compression string Which protocol to use for compressing gRPC. Default: nothing. Supported: snappy

go/flags/endtoend/vttablet.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,9 @@ Flags:
133133
--dba_idle_timeout duration Idle timeout for dba connections (default 1m0s)
134134
--dba_pool_size int Size of the connection pool for dba connections (default 20)
135135
--degraded_threshold duration replication lag after which a replica is considered degraded (default 30s)
136+
--disk-write-dir string if provided, tablet will attempt to write a file to this directory to check if the disk is stalled
137+
--disk-write-interval duration how often to write to the disk to check whether it is stalled (default 5s)
138+
--disk-write-timeout duration if writes exceed this duration, the disk is considered stalled (default 30s)
136139
--emit_stats If set, emit stats to push-based monitoring and stats backends
137140
--enable-consolidator Synonym to -enable_consolidator (default true)
138141
--enable-consolidator-replicas Synonym to -enable_consolidator_replicas

go/mysql/capabilities/capability.go

Lines changed: 22 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -31,25 +31,26 @@ var (
3131
type FlavorCapability int
3232

3333
const (
34-
NoneFlavorCapability FlavorCapability = iota // default placeholder
35-
FastDropTableFlavorCapability // supported in MySQL 8.0.23 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-23.html
36-
TransactionalGtidExecutedFlavorCapability //
37-
InstantDDLFlavorCapability // ALGORITHM=INSTANT general support
38-
InstantAddLastColumnFlavorCapability //
39-
InstantAddDropVirtualColumnFlavorCapability //
40-
InstantAddDropColumnFlavorCapability // Adding/dropping column in any position/ordinal.
41-
InstantChangeColumnDefaultFlavorCapability //
42-
InstantExpandEnumCapability //
43-
InstantChangeColumnVisibilityCapability //
44-
MySQLUpgradeInServerFlavorCapability //
45-
DynamicRedoLogCapacityFlavorCapability // supported in MySQL 8.0.30 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-30.html
46-
DisableRedoLogFlavorCapability // supported in MySQL 8.0.21 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-21.html
47-
CheckConstraintsCapability // supported in MySQL 8.0.16 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-16.html
48-
PerformanceSchemaDataLocksTableCapability // supported in MySQL 8.0.1 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-1.html
49-
InstantDDLXtrabackupCapability // Supported in 8.0.32 and above, solving a MySQL-vs-Xtrabackup bug starting 8.0.29
50-
ReplicaTerminologyCapability // Supported in 8.0.26 and above, using SHOW REPLICA STATUS and all variations.
51-
BinaryLogStatus // Supported in 8.2.0 and above, uses SHOW BINARY LOG STATUS
52-
RestrictFKOnNonStandardKey // Supported in 8.4.0 and above, restricts usage of non-standard indexes for foreign keys.
34+
NoneFlavorCapability FlavorCapability = iota // default placeholder
35+
FastDropTableFlavorCapability // supported in MySQL 8.0.23 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-23.html
36+
TransactionalGtidExecutedFlavorCapability //
37+
InstantDDLFlavorCapability // ALGORITHM=INSTANT general support
38+
InstantAddLastColumnFlavorCapability //
39+
InstantAddDropVirtualColumnFlavorCapability //
40+
InstantAddDropColumnFlavorCapability // Adding/dropping column in any position/ordinal.
41+
InstantChangeColumnDefaultFlavorCapability //
42+
InstantExpandEnumCapability //
43+
InstantChangeColumnVisibilityCapability //
44+
MySQLUpgradeInServerFlavorCapability //
45+
DynamicRedoLogCapacityFlavorCapability // supported in MySQL 8.0.30 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-30.html
46+
DisableRedoLogFlavorCapability // supported in MySQL 8.0.21 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-21.html
47+
CheckConstraintsCapability // supported in MySQL 8.0.16 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-16.html
48+
PerformanceSchemaDataLocksTableCapability // supported in MySQL 8.0.1 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-1.html
49+
PerformanceSchemaMetadataLocksTableCapability // supported in MySQL 8.0.2 and above: https://dev.mysql.com/doc/relnotes/mysql/8.0/en/news-8-0-2.html
50+
InstantDDLXtrabackupCapability // Supported in 8.0.32 and above, solving a MySQL-vs-Xtrabackup bug starting 8.0.29
51+
ReplicaTerminologyCapability // Supported in 8.0.26 and above, using SHOW REPLICA STATUS and all variations.
52+
BinaryLogStatus // Supported in 8.2.0 and above, uses SHOW BINARY LOG STATUS
53+
RestrictFKOnNonStandardKey // Supported in 8.4.0 and above, restricts usage of non-standard indexes for foreign keys.
5354
)
5455

5556
type CapableOf func(capability FlavorCapability) (bool, error)
@@ -97,6 +98,8 @@ func MySQLVersionHasCapability(serverVersion string, capability FlavorCapability
9798
return atLeast(8, 0, 0)
9899
case PerformanceSchemaDataLocksTableCapability:
99100
return atLeast(8, 0, 1)
101+
case PerformanceSchemaMetadataLocksTableCapability:
102+
return atLeast(8, 0, 2)
100103
case MySQLUpgradeInServerFlavorCapability:
101104
return atLeast(8, 0, 16)
102105
case CheckConstraintsCapability:

go/mysql/capabilities/capability_test.go

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -218,6 +218,25 @@ func TestMySQLVersionCapableOf(t *testing.T) {
218218
version: "8.0.20",
219219
capability: PerformanceSchemaDataLocksTableCapability,
220220
isCapable: true,
221+
}, {
222+
version: "5.7.38",
223+
capability: PerformanceSchemaMetadataLocksTableCapability,
224+
isCapable: false,
225+
},
226+
{
227+
version: "8.0",
228+
capability: PerformanceSchemaMetadataLocksTableCapability,
229+
isCapable: false,
230+
},
231+
{
232+
version: "8.0.1",
233+
capability: PerformanceSchemaMetadataLocksTableCapability,
234+
isCapable: false,
235+
},
236+
{
237+
version: "8.0.2",
238+
capability: PerformanceSchemaMetadataLocksTableCapability,
239+
isCapable: true,
221240
},
222241
{
223242
version: "8.0.29",

go/mysql/collations/colldata/cached_size.go

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/mysql/flavor_test.go

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,15 @@ func TestServerVersionCapableOf(t *testing.T) {
102102
version: "8.0.20",
103103
capability: capabilities.PerformanceSchemaDataLocksTableCapability,
104104
isCapable: true,
105+
}, {
106+
version: "5.7.38",
107+
capability: capabilities.PerformanceSchemaMetadataLocksTableCapability,
108+
isCapable: false,
109+
},
110+
{
111+
version: "8.0.20",
112+
capability: capabilities.PerformanceSchemaMetadataLocksTableCapability,
113+
isCapable: true,
105114
},
106115
{
107116
// Some ridiculous version

go/sqltypes/cached_size.go

Lines changed: 8 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

go/test/endtoend/onlineddl/scheduler/onlineddl_scheduler_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -714,6 +714,89 @@ func testScheduler(t *testing.T) {
714714
}
715715
})
716716
})
717+
t.Run("force_cutover mdl", func(t *testing.T) {
718+
ctx, cancel := context.WithTimeout(context.Background(), extendedWaitTime*5)
719+
defer cancel()
720+
721+
t1uuid = testOnlineDDLStatement(t, createParams(trivialAlterT1Statement, ddlStrategy+" --postpone-completion", "vtgate", "", "", true)) // skip wait
722+
723+
t.Run("wait for t1 running", func(t *testing.T) {
724+
status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusRunning)
725+
fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
726+
})
727+
t.Run("wait for t1 ready to complete", func(t *testing.T) {
728+
// Waiting for 'running', above, is not enough. We want to let vreplication a chance to start running, or else
729+
// we attempt the cut-over too early. Specifically in this test, we're going to lock rows FOR UPDATE, which,
730+
// if vreplication does not get the chance to start, will prevent it from doing anything at all.
731+
// ready_to_complete is a great signal for us that vreplication is healthy and up to date.
732+
waitForReadyToComplete(t, t1uuid, true)
733+
})
734+
735+
conn, err := primaryTablet.VttabletProcess.TabletConn(keyspaceName, true)
736+
require.NoError(t, err)
737+
defer conn.Close()
738+
739+
unlockTables := func() error {
740+
_, err := conn.ExecuteFetch("unlock tables", 0, false)
741+
return err
742+
}
743+
t.Run("locking table", func(t *testing.T) {
744+
_, err := conn.ExecuteFetch("lock tables t1_test write", 0, false)
745+
require.NoError(t, err)
746+
})
747+
defer unlockTables()
748+
t.Run("injecting heartbeats asynchronously", func(t *testing.T) {
749+
go func() {
750+
ticker := time.NewTicker(time.Second)
751+
defer ticker.Stop()
752+
for {
753+
throttler.CheckThrottler(clusterInstance, primaryTablet, throttlerapp.OnlineDDLName, nil)
754+
select {
755+
case <-ticker.C:
756+
case <-ctx.Done():
757+
return
758+
}
759+
}
760+
}()
761+
})
762+
t.Run("check no force_cutover", func(t *testing.T) {
763+
rs := onlineddl.ReadMigrations(t, &vtParams, t1uuid)
764+
require.NotNil(t, rs)
765+
for _, row := range rs.Named().Rows {
766+
forceCutOver := row.AsInt64("force_cutover", 0)
767+
assert.Equal(t, int64(0), forceCutOver) // disabled
768+
}
769+
})
770+
t.Run("attempt to complete", func(t *testing.T) {
771+
onlineddl.CheckCompleteMigration(t, &vtParams, shards, t1uuid, true)
772+
})
773+
t.Run("cut-over fail due to timeout", func(t *testing.T) {
774+
waitForMessage(t, t1uuid, "(errno 3024) (sqlstate HY000): Query execution was interrupted, maximum statement execution time exceeded")
775+
status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed, schema.OnlineDDLStatusRunning)
776+
fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
777+
onlineddl.CheckMigrationStatus(t, &vtParams, shards, t1uuid, schema.OnlineDDLStatusRunning)
778+
})
779+
t.Run("force_cutover", func(t *testing.T) {
780+
onlineddl.CheckForceMigrationCutOver(t, &vtParams, shards, t1uuid, true)
781+
})
782+
t.Run("check force_cutover", func(t *testing.T) {
783+
rs := onlineddl.ReadMigrations(t, &vtParams, t1uuid)
784+
require.NotNil(t, rs)
785+
for _, row := range rs.Named().Rows {
786+
forceCutOver := row.AsInt64("force_cutover", 0)
787+
assert.Equal(t, int64(1), forceCutOver) // enabled
788+
}
789+
})
790+
t.Run("expect completion", func(t *testing.T) {
791+
status := onlineddl.WaitForMigrationStatus(t, &vtParams, shards, t1uuid, normalWaitTime, schema.OnlineDDLStatusComplete, schema.OnlineDDLStatusFailed)
792+
fmt.Printf("# Migration status (for debug purposes): <%s>\n", status)
793+
onlineddl.CheckMigrationStatus(t, &vtParams, shards, t1uuid, schema.OnlineDDLStatusComplete)
794+
})
795+
t.Run("expect unlock failure", func(t *testing.T) {
796+
err := unlockTables()
797+
assert.ErrorContains(t, err, "broken pipe")
798+
})
799+
})
717800
}
718801
t.Run("ALTER both tables non-concurrent", func(t *testing.T) {
719802
t1uuid = testOnlineDDLStatement(t, createParams(trivialAlterT1Statement, ddlStrategy, "vtgate", "", "", true)) // skip wait

0 commit comments

Comments
 (0)