Skip to content

Commit 1cc01fc

Browse files
Merge branch 'release-19.0' of https://github.com/vitessio/vitess into release-19.0-github
Signed-off-by: Arthur Schreiber <arthurschreiber@github.com>
2 parents ce7168c + 5c08da6 commit 1cc01fc

31 files changed

+760
-159
lines changed

go/mysql/server_test.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1424,7 +1424,7 @@ func TestListenerShutdown(t *testing.T) {
14241424

14251425
l.Shutdown()
14261426

1427-
assert.EqualValues(t, 1, connRefuse.Get(), "connRefuse")
1427+
waitForConnRefuse(t, 1)
14281428

14291429
err = conn.Ping()
14301430
require.EqualError(t, err, "Server shutdown in progress (errno 1053) (sqlstate 08S01)")
@@ -1436,6 +1436,24 @@ func TestListenerShutdown(t *testing.T) {
14361436
require.Equal(t, "Server shutdown in progress", sqlErr.Message)
14371437
}
14381438

1439+
func waitForConnRefuse(t *testing.T, valWanted int64) {
1440+
ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
1441+
defer cancel()
1442+
tick := time.NewTicker(100 * time.Millisecond)
1443+
defer tick.Stop()
1444+
1445+
for {
1446+
select {
1447+
case <-ctx.Done():
1448+
require.FailNow(t, "connRefuse did not reach %v", valWanted)
1449+
case <-tick.C:
1450+
if connRefuse.Get() == valWanted {
1451+
return
1452+
}
1453+
}
1454+
}
1455+
}
1456+
14391457
func TestParseConnAttrs(t *testing.T) {
14401458
expected := map[string]string{
14411459
"_client_version": "8.0.11",

go/test/endtoend/vtgate/lookup_test.go

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -552,6 +552,22 @@ func TestConsistentLookupUpdate(t *testing.T) {
552552
require.Empty(t, qr.Rows)
553553
}
554554

555+
func TestSelectMultiEqualLookup(t *testing.T) {
556+
conn, closer := start(t)
557+
defer closer()
558+
559+
utils.Exec(t, conn, "insert into t10 (id, sharding_key, col1) values (1, 1, 'bar'), (2, 1, 'bar'), (3, 1, 'bar'), (4, 2, 'bar'), (5, 2, 'bar')")
560+
561+
for _, workload := range []string{"oltp", "olap"} {
562+
t.Run(workload, func(t *testing.T) {
563+
utils.Exec(t, conn, "set workload = "+workload)
564+
565+
utils.AssertMatches(t, conn, "select id from t10 WHERE (col1, id) IN (('bar', 1), ('baz', 2), ('qux', 3), ('barbar', 4))", "[[INT64(1)]]")
566+
utils.AssertMatches(t, conn, "select id from t10 WHERE (col1 = 'bar' AND id = 1) OR (col1 = 'baz' AND id = 2) OR (col1 = 'qux' AND id = 3) OR (col1 = 'barbar' AND id = 4)", "[[INT64(1)]]")
567+
})
568+
}
569+
}
570+
555571
func TestSelectNullLookup(t *testing.T) {
556572
conn, closer := start(t)
557573
defer closer()

go/test/endtoend/vtgate/schema.sql

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -164,4 +164,4 @@ create table t11
164164
col2 int,
165165
col3 int,
166166
primary key (id)
167-
) Engine = InnoDB;
167+
) Engine = InnoDB;

go/vt/discovery/healthcheck.go

Lines changed: 8 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -352,7 +352,7 @@ func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Dur
352352
healthy: make(map[KeyspaceShardTabletType][]*TabletHealth),
353353
subscribers: make(map[chan *TabletHealth]struct{}),
354354
cellAliases: make(map[string]string),
355-
loadTabletsTrigger: make(chan struct{}),
355+
loadTabletsTrigger: make(chan struct{}, 1),
356356
}
357357
var topoWatchers []*TopologyWatcher
358358
cells := strings.Split(cellsToWatch, ",")
@@ -531,7 +531,13 @@ func (hc *HealthCheckImpl) updateHealth(th *TabletHealth, prevTarget *query.Targ
531531
if prevTarget.TabletType == topodata.TabletType_PRIMARY {
532532
if primaries := hc.healthData[oldTargetKey]; len(primaries) == 0 {
533533
log.Infof("We will have no health data for the next new primary tablet after demoting the tablet: %v, so start loading tablets now", topotools.TabletIdent(th.Tablet))
534-
hc.loadTabletsTrigger <- struct{}{}
534+
// We want to trigger a loadTablets call, but if the channel is not empty
535+
// then a trigger is already scheduled, we don't need to trigger another one.
536+
// This also prevents the code from deadlocking as described in https://github.com/vitessio/vitess/issues/16994.
537+
select {
538+
case hc.loadTabletsTrigger <- struct{}{}:
539+
default:
540+
}
535541
}
536542
}
537543
}

go/vt/discovery/topology_watcher_test.go

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ import (
2828
"google.golang.org/protobuf/proto"
2929

3030
"vitess.io/vitess/go/test/utils"
31+
querypb "vitess.io/vitess/go/vt/proto/query"
3132

3233
"vitess.io/vitess/go/vt/logutil"
3334
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
@@ -682,3 +683,67 @@ func TestGetTabletErrorDoesNotRemoveFromHealthcheck(t *testing.T) {
682683
assert.True(t, proto.Equal(tablet1, allTablets[key1]))
683684
assert.True(t, proto.Equal(tablet2, allTablets[key2]))
684685
}
686+
687+
// TestDeadlockBetweenTopologyWatcherAndHealthCheck tests the possibility of a deadlock
688+
// between the topology watcher and the health check.
689+
// The issue https://github.com/vitessio/vitess/issues/16994 has more details on the deadlock.
690+
func TestDeadlockBetweenTopologyWatcherAndHealthCheck(t *testing.T) {
691+
ctx := utils.LeakCheckContext(t)
692+
693+
// create a new memory topo server and an health check instance.
694+
ts, _ := memorytopo.NewServerAndFactory(ctx, "zone-1")
695+
hc := NewHealthCheck(ctx, time.Hour, time.Hour, ts, "zone-1", "", nil)
696+
defer hc.Close()
697+
defer hc.topoWatchers[0].Stop()
698+
699+
// Add a tablet to the topology.
700+
tablet1 := &topodatapb.Tablet{
701+
Alias: &topodatapb.TabletAlias{
702+
Cell: "zone-1",
703+
Uid: 100,
704+
},
705+
Type: topodatapb.TabletType_REPLICA,
706+
Hostname: "host1",
707+
PortMap: map[string]int32{
708+
"grpc": 123,
709+
},
710+
Keyspace: "keyspace",
711+
Shard: "shard",
712+
}
713+
err := ts.CreateTablet(ctx, tablet1)
714+
// Run the first loadTablets call to ensure the tablet is present in the topology watcher.
715+
hc.topoWatchers[0].loadTablets()
716+
require.NoError(t, err)
717+
718+
// We want to run updateHealth with arguments that always
719+
// make it trigger load Tablets.
720+
th := &TabletHealth{
721+
Tablet: tablet1,
722+
Target: &querypb.Target{
723+
Keyspace: "keyspace",
724+
Shard: "shard",
725+
TabletType: topodatapb.TabletType_REPLICA,
726+
},
727+
}
728+
prevTarget := &querypb.Target{
729+
Keyspace: "keyspace",
730+
Shard: "shard",
731+
TabletType: topodatapb.TabletType_PRIMARY,
732+
}
733+
734+
// If we run the updateHealth function often enough, then we
735+
// will see the deadlock where the topology watcher is trying to replace
736+
// the tablet in the health check, but health check has the mutex acquired
737+
// already because it is calling updateHealth.
738+
// updateHealth itself will be stuck trying to send on the shared channel.
739+
for i := 0; i < 10; i++ {
740+
// Update the port of the tablet so that when update Health asks topo watcher to
741+
// refresh the tablets, it finds an update and tries to replace it.
742+
_, err = ts.UpdateTabletFields(ctx, tablet1.Alias, func(t *topodatapb.Tablet) error {
743+
t.PortMap["testing_port"] = int32(i + 1)
744+
return nil
745+
})
746+
require.NoError(t, err)
747+
hc.updateHealth(th, prevTarget, false, false)
748+
}
749+
}

go/vt/mysqlctl/fakemysqldaemon.go

Lines changed: 17 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -286,13 +286,6 @@ func (fmd *FakeMysqlDaemon) GetServerUUID(ctx context.Context) (string, error) {
286286
return "000000", nil
287287
}
288288

289-
// CurrentPrimaryPositionLocked is thread-safe.
290-
func (fmd *FakeMysqlDaemon) CurrentPrimaryPositionLocked(pos replication.Position) {
291-
fmd.mu.Lock()
292-
defer fmd.mu.Unlock()
293-
fmd.CurrentPrimaryPosition = pos
294-
}
295-
296289
// ReplicationStatus is part of the MysqlDaemon interface.
297290
func (fmd *FakeMysqlDaemon) ReplicationStatus() (replication.ReplicationStatus, error) {
298291
if fmd.ReplicationStatusError != nil {
@@ -316,6 +309,8 @@ func (fmd *FakeMysqlDaemon) ReplicationStatus() (replication.ReplicationStatus,
316309

317310
// PrimaryStatus is part of the MysqlDaemon interface.
318311
func (fmd *FakeMysqlDaemon) PrimaryStatus(ctx context.Context) (replication.PrimaryStatus, error) {
312+
fmd.mu.Lock()
313+
defer fmd.mu.Unlock()
319314
if fmd.PrimaryStatusError != nil {
320315
return replication.PrimaryStatus{}, fmd.PrimaryStatusError
321316
}
@@ -381,7 +376,21 @@ func (fmd *FakeMysqlDaemon) GetPreviousGTIDs(ctx context.Context, binlog string)
381376

382377
// PrimaryPosition is part of the MysqlDaemon interface.
383378
func (fmd *FakeMysqlDaemon) PrimaryPosition() (replication.Position, error) {
384-
return fmd.CurrentPrimaryPosition, nil
379+
return fmd.GetPrimaryPositionLocked(), nil
380+
}
381+
382+
// GetPrimaryPositionLocked gets the primary position while holding the lock.
383+
func (fmd *FakeMysqlDaemon) GetPrimaryPositionLocked() replication.Position {
384+
fmd.mu.Lock()
385+
defer fmd.mu.Unlock()
386+
return fmd.CurrentPrimaryPosition
387+
}
388+
389+
// SetPrimaryPositionLocked is thread-safe.
390+
func (fmd *FakeMysqlDaemon) SetPrimaryPositionLocked(pos replication.Position) {
391+
fmd.mu.Lock()
392+
defer fmd.mu.Unlock()
393+
fmd.CurrentPrimaryPosition = pos
385394
}
386395

387396
// IsReadOnly is part of the MysqlDaemon interface.

go/vt/vtctl/grpcvtctldserver/server_test.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12620,7 +12620,7 @@ func TestTabletExternallyReparented(t *testing.T) {
1262012620
defer func() {
1262112621
topofactory.SetError(nil)
1262212622

12623-
ctx, cancel := context.WithTimeout(ctx, time.Millisecond*10)
12623+
ctx, cancel := context.WithTimeout(ctx, time.Second*10)
1262412624
defer cancel()
1262512625

1262612626
resp, err := vtctld.GetTablets(ctx, &vtctldatapb.GetTabletsRequest{})

go/vt/vtgate/engine/route.go

Lines changed: 12 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -137,11 +137,12 @@ func (route *Route) SetTruncateColumnCount(count int) {
137137
func (route *Route) TryExecute(ctx context.Context, vcursor VCursor, bindVars map[string]*querypb.BindVariable, wantfields bool) (*sqltypes.Result, error) {
138138
ctx, cancelFunc := addQueryTimeout(ctx, vcursor, route.QueryTimeout)
139139
defer cancelFunc()
140-
qr, err := route.executeInternal(ctx, vcursor, bindVars, wantfields)
140+
rss, bvs, err := route.findRoute(ctx, vcursor, bindVars)
141141
if err != nil {
142142
return nil, err
143143
}
144-
return qr.Truncate(route.TruncateColumnCount), nil
144+
145+
return route.executeShards(ctx, vcursor, bindVars, wantfields, rss, bvs)
145146
}
146147

147148
// addQueryTimeout adds a query timeout to the context it receives and returns the modified context along with the cancel function.
@@ -159,20 +160,6 @@ const (
159160
IgnoreReserveTxn cxtKey = iota
160161
)
161162

162-
func (route *Route) executeInternal(
163-
ctx context.Context,
164-
vcursor VCursor,
165-
bindVars map[string]*querypb.BindVariable,
166-
wantfields bool,
167-
) (*sqltypes.Result, error) {
168-
rss, bvs, err := route.findRoute(ctx, vcursor, bindVars)
169-
if err != nil {
170-
return nil, err
171-
}
172-
173-
return route.executeShards(ctx, vcursor, bindVars, wantfields, rss, bvs)
174-
}
175-
176163
func (route *Route) executeShards(
177164
ctx context.Context,
178165
vcursor VCursor,
@@ -228,11 +215,15 @@ func (route *Route) executeShards(
228215
}
229216
}
230217

231-
if len(route.OrderBy) == 0 {
232-
return result, nil
218+
if len(route.OrderBy) != 0 {
219+
var err error
220+
result, err = route.sort(result)
221+
if err != nil {
222+
return nil, err
223+
}
233224
}
234225

235-
return route.sort(result)
226+
return result.Truncate(route.TruncateColumnCount), nil
236227
}
237228

238229
func filterOutNilErrors(errs []error) []error {
@@ -389,10 +380,8 @@ func (route *Route) sort(in *sqltypes.Result) (*sqltypes.Result, error) {
389380
// the contents of any row.
390381
out := in.ShallowCopy()
391382

392-
if err := route.OrderBy.SortResult(out); err != nil {
393-
return nil, err
394-
}
395-
return out.Truncate(route.TruncateColumnCount), nil
383+
err := route.OrderBy.SortResult(out)
384+
return out, err
396385
}
397386

398387
func (route *Route) description() PrimitiveDescription {

go/vt/vtgate/engine/vindex_lookup.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -252,7 +252,7 @@ func (vr *VindexLookup) generateIds(ctx context.Context, vcursor VCursor, bindVa
252252
switch vr.Opcode {
253253
case Equal, EqualUnique:
254254
return []sqltypes.Value{value.Value(vcursor.ConnCollation())}, nil
255-
case IN:
255+
case IN, MultiEqual:
256256
return value.TupleValues(), nil
257257
}
258258
return nil, vterrors.Errorf(vtrpcpb.Code_INTERNAL, "opcode %s not supported for VindexLookup", vr.Opcode.String())

0 commit comments

Comments
 (0)