Skip to content

Commit 462f380

Browse files
GuptaManan100timvaillancourt
authored andcommitted
[release-19.0] Add timeout to all the contexts used for RPC calls in vtorc (vitessio#15991) (vitessio#16103)
Signed-off-by: Manan Gupta <manan@planetscale.com>
1 parent ba4be6c commit 462f380

File tree

3 files changed

+289
-6
lines changed

3 files changed

+289
-6
lines changed

go/vt/vtctl/grpcvtctldserver/testutil/test_tmclient.go

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,7 @@ type TabletManagerClient struct {
186186
}
187187
// keyed by tablet alias.
188188
ChangeTabletTypeResult map[string]error
189+
ChangeTabletTypeDelays map[string]time.Duration
189190
// keyed by tablet alias.
190191
DemotePrimaryDelays map[string]time.Duration
191192
// keyed by tablet alias.
@@ -461,7 +462,20 @@ func (fake *TabletManagerClient) Backup(ctx context.Context, tablet *topodatapb.
461462

462463
// ChangeType is part of the tmclient.TabletManagerClient interface.
463464
func (fake *TabletManagerClient) ChangeType(ctx context.Context, tablet *topodatapb.Tablet, newType topodatapb.TabletType, semiSync bool) error {
464-
if result, ok := fake.ChangeTabletTypeResult[topoproto.TabletAliasString(tablet.Alias)]; ok {
465+
key := topoproto.TabletAliasString(tablet.Alias)
466+
467+
if fake.ChangeTabletTypeDelays != nil {
468+
if delay, ok := fake.ChangeTabletTypeDelays[key]; ok {
469+
select {
470+
case <-ctx.Done():
471+
return ctx.Err()
472+
case <-time.After(delay):
473+
// proceed to results
474+
}
475+
}
476+
}
477+
478+
if result, ok := fake.ChangeTabletTypeResult[key]; ok {
465479
return result
466480
}
467481

go/vt/vtorc/logic/tablet_discovery.go

Lines changed: 15 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -280,27 +280,37 @@ func LockShard(ctx context.Context, tabletAlias string, lockAction string) (cont
280280

281281
// tabletUndoDemotePrimary calls the said RPC for the given tablet.
282282
func tabletUndoDemotePrimary(ctx context.Context, tablet *topodatapb.Tablet, semiSync bool) error {
283-
return tmc.UndoDemotePrimary(ctx, tablet, semiSync)
283+
tmcCtx, tmcCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
284+
defer tmcCancel()
285+
return tmc.UndoDemotePrimary(tmcCtx, tablet, semiSync)
284286
}
285287

286288
// setReadOnly calls the said RPC for the given tablet
287289
func setReadOnly(ctx context.Context, tablet *topodatapb.Tablet) error {
288-
return tmc.SetReadOnly(ctx, tablet)
290+
tmcCtx, tmcCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
291+
defer tmcCancel()
292+
return tmc.SetReadOnly(tmcCtx, tablet)
289293
}
290294

291295
// changeTabletType calls the said RPC for the given tablet with the given parameters.
292296
func changeTabletType(ctx context.Context, tablet *topodatapb.Tablet, tabletType topodatapb.TabletType, semiSync bool) error {
293-
return tmc.ChangeType(ctx, tablet, tabletType, semiSync)
297+
tmcCtx, tmcCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
298+
defer tmcCancel()
299+
return tmc.ChangeType(tmcCtx, tablet, tabletType, semiSync)
294300
}
295301

296302
// resetReplicationParameters resets the replication parameters on the given tablet.
297303
func resetReplicationParameters(ctx context.Context, tablet *topodatapb.Tablet) error {
298-
return tmc.ResetReplicationParameters(ctx, tablet)
304+
tmcCtx, tmcCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
305+
defer tmcCancel()
306+
return tmc.ResetReplicationParameters(tmcCtx, tablet)
299307
}
300308

301309
// setReplicationSource calls the said RPC with the parameters provided
302310
func setReplicationSource(ctx context.Context, replica *topodatapb.Tablet, primary *topodatapb.Tablet, semiSync bool) error {
303-
return tmc.SetReplicationSource(ctx, replica, primary.Alias, 0, "", true, semiSync)
311+
tmcCtx, tmcCancel := context.WithTimeout(ctx, topo.RemoteOperationTimeout)
312+
defer tmcCancel()
313+
return tmc.SetReplicationSource(tmcCtx, replica, primary.Alias, 0, "", true, semiSync)
304314
}
305315

306316
// shardPrimary finds the primary of the given keyspace-shard by reading the vtorc backend

go/vt/vtorc/logic/tablet_discovery_test.go

Lines changed: 259 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,7 @@ import (
2121
"fmt"
2222
"sync/atomic"
2323
"testing"
24+
"time"
2425

2526
"github.com/google/go-cmp/cmp"
2627
"github.com/stretchr/testify/assert"
@@ -30,8 +31,10 @@ import (
3031
"vitess.io/vitess/go/vt/external/golib/sqlutils"
3132
topodatapb "vitess.io/vitess/go/vt/proto/topodata"
3233
"vitess.io/vitess/go/vt/proto/vttime"
34+
"vitess.io/vitess/go/vt/topo"
3335
"vitess.io/vitess/go/vt/topo/memorytopo"
3436
"vitess.io/vitess/go/vt/topo/topoproto"
37+
"vitess.io/vitess/go/vt/vtctl/grpcvtctldserver/testutil"
3538
"vitess.io/vitess/go/vt/vtorc/db"
3639
"vitess.io/vitess/go/vt/vtorc/inst"
3740
)
@@ -342,3 +345,259 @@ func TestGetLockAction(t *testing.T) {
342345
})
343346
}
344347
}
348+
349+
func TestSetReadOnly(t *testing.T) {
350+
tests := []struct {
351+
name string
352+
tablet *topodatapb.Tablet
353+
tmc *testutil.TabletManagerClient
354+
remoteOpTimeout time.Duration
355+
errShouldContain string
356+
}{
357+
{
358+
name: "Success",
359+
tablet: tab100,
360+
tmc: &testutil.TabletManagerClient{
361+
SetReadOnlyResults: map[string]error{
362+
"zone-1-0000000100": nil,
363+
},
364+
},
365+
}, {
366+
name: "Failure",
367+
tablet: tab100,
368+
tmc: &testutil.TabletManagerClient{
369+
SetReadOnlyResults: map[string]error{
370+
"zone-1-0000000100": fmt.Errorf("testing error"),
371+
},
372+
},
373+
errShouldContain: "testing error",
374+
}, {
375+
name: "Timeout",
376+
tablet: tab100,
377+
remoteOpTimeout: 100 * time.Millisecond,
378+
tmc: &testutil.TabletManagerClient{
379+
SetReadOnlyResults: map[string]error{
380+
"zone-1-0000000100": nil,
381+
},
382+
SetReadOnlyDelays: map[string]time.Duration{
383+
"zone-1-0000000100": 200 * time.Millisecond,
384+
},
385+
},
386+
errShouldContain: "context deadline exceeded",
387+
},
388+
}
389+
for _, tt := range tests {
390+
t.Run(tt.name, func(t *testing.T) {
391+
oldTmc := tmc
392+
oldRemoteOpTimeout := topo.RemoteOperationTimeout
393+
defer func() {
394+
tmc = oldTmc
395+
topo.RemoteOperationTimeout = oldRemoteOpTimeout
396+
}()
397+
398+
tmc = tt.tmc
399+
if tt.remoteOpTimeout != 0 {
400+
topo.RemoteOperationTimeout = tt.remoteOpTimeout
401+
}
402+
403+
err := setReadOnly(context.Background(), tt.tablet)
404+
if tt.errShouldContain == "" {
405+
require.NoError(t, err)
406+
return
407+
}
408+
require.ErrorContains(t, err, tt.errShouldContain)
409+
})
410+
}
411+
}
412+
413+
func TestTabletUndoDemotePrimary(t *testing.T) {
414+
tests := []struct {
415+
name string
416+
tablet *topodatapb.Tablet
417+
tmc *testutil.TabletManagerClient
418+
remoteOpTimeout time.Duration
419+
errShouldContain string
420+
}{
421+
{
422+
name: "Success",
423+
tablet: tab100,
424+
tmc: &testutil.TabletManagerClient{
425+
UndoDemotePrimaryResults: map[string]error{
426+
"zone-1-0000000100": nil,
427+
},
428+
},
429+
}, {
430+
name: "Failure",
431+
tablet: tab100,
432+
tmc: &testutil.TabletManagerClient{
433+
UndoDemotePrimaryResults: map[string]error{
434+
"zone-1-0000000100": fmt.Errorf("testing error"),
435+
},
436+
},
437+
errShouldContain: "testing error",
438+
}, {
439+
name: "Timeout",
440+
tablet: tab100,
441+
remoteOpTimeout: 100 * time.Millisecond,
442+
tmc: &testutil.TabletManagerClient{
443+
UndoDemotePrimaryResults: map[string]error{
444+
"zone-1-0000000100": nil,
445+
},
446+
UndoDemotePrimaryDelays: map[string]time.Duration{
447+
"zone-1-0000000100": 200 * time.Millisecond,
448+
},
449+
},
450+
errShouldContain: "context deadline exceeded",
451+
},
452+
}
453+
for _, tt := range tests {
454+
t.Run(tt.name, func(t *testing.T) {
455+
oldTmc := tmc
456+
oldRemoteOpTimeout := topo.RemoteOperationTimeout
457+
defer func() {
458+
tmc = oldTmc
459+
topo.RemoteOperationTimeout = oldRemoteOpTimeout
460+
}()
461+
462+
tmc = tt.tmc
463+
if tt.remoteOpTimeout != 0 {
464+
topo.RemoteOperationTimeout = tt.remoteOpTimeout
465+
}
466+
467+
err := tabletUndoDemotePrimary(context.Background(), tt.tablet, false)
468+
if tt.errShouldContain == "" {
469+
require.NoError(t, err)
470+
return
471+
}
472+
require.ErrorContains(t, err, tt.errShouldContain)
473+
})
474+
}
475+
}
476+
477+
func TestChangeTabletType(t *testing.T) {
478+
tests := []struct {
479+
name string
480+
tablet *topodatapb.Tablet
481+
tmc *testutil.TabletManagerClient
482+
remoteOpTimeout time.Duration
483+
errShouldContain string
484+
}{
485+
{
486+
name: "Success",
487+
tablet: tab100,
488+
tmc: &testutil.TabletManagerClient{
489+
ChangeTabletTypeResult: map[string]error{
490+
"zone-1-0000000100": nil,
491+
},
492+
},
493+
}, {
494+
name: "Failure",
495+
tablet: tab100,
496+
tmc: &testutil.TabletManagerClient{
497+
ChangeTabletTypeResult: map[string]error{
498+
"zone-1-0000000100": fmt.Errorf("testing error"),
499+
},
500+
},
501+
errShouldContain: "testing error",
502+
}, {
503+
name: "Timeout",
504+
tablet: tab100,
505+
remoteOpTimeout: 100 * time.Millisecond,
506+
tmc: &testutil.TabletManagerClient{
507+
ChangeTabletTypeResult: map[string]error{
508+
"zone-1-0000000100": nil,
509+
},
510+
ChangeTabletTypeDelays: map[string]time.Duration{
511+
"zone-1-0000000100": 200 * time.Millisecond,
512+
},
513+
},
514+
errShouldContain: "context deadline exceeded",
515+
},
516+
}
517+
for _, tt := range tests {
518+
t.Run(tt.name, func(t *testing.T) {
519+
oldTmc := tmc
520+
oldRemoteOpTimeout := topo.RemoteOperationTimeout
521+
defer func() {
522+
tmc = oldTmc
523+
topo.RemoteOperationTimeout = oldRemoteOpTimeout
524+
}()
525+
526+
tmc = tt.tmc
527+
if tt.remoteOpTimeout != 0 {
528+
topo.RemoteOperationTimeout = tt.remoteOpTimeout
529+
}
530+
531+
err := changeTabletType(context.Background(), tt.tablet, topodatapb.TabletType_REPLICA, false)
532+
if tt.errShouldContain == "" {
533+
require.NoError(t, err)
534+
return
535+
}
536+
require.ErrorContains(t, err, tt.errShouldContain)
537+
})
538+
}
539+
}
540+
541+
func TestSetReplicationSource(t *testing.T) {
542+
tests := []struct {
543+
name string
544+
tablet *topodatapb.Tablet
545+
tmc *testutil.TabletManagerClient
546+
remoteOpTimeout time.Duration
547+
errShouldContain string
548+
}{
549+
{
550+
name: "Success",
551+
tablet: tab100,
552+
tmc: &testutil.TabletManagerClient{
553+
SetReplicationSourceResults: map[string]error{
554+
"zone-1-0000000100": nil,
555+
},
556+
},
557+
}, {
558+
name: "Failure",
559+
tablet: tab100,
560+
tmc: &testutil.TabletManagerClient{
561+
SetReplicationSourceResults: map[string]error{
562+
"zone-1-0000000100": fmt.Errorf("testing error"),
563+
},
564+
},
565+
errShouldContain: "testing error",
566+
}, {
567+
name: "Timeout",
568+
tablet: tab100,
569+
remoteOpTimeout: 100 * time.Millisecond,
570+
tmc: &testutil.TabletManagerClient{
571+
SetReplicationSourceResults: map[string]error{
572+
"zone-1-0000000100": nil,
573+
},
574+
SetReplicationSourceDelays: map[string]time.Duration{
575+
"zone-1-0000000100": 200 * time.Millisecond,
576+
},
577+
},
578+
errShouldContain: "context deadline exceeded",
579+
},
580+
}
581+
for _, tt := range tests {
582+
t.Run(tt.name, func(t *testing.T) {
583+
oldTmc := tmc
584+
oldRemoteOpTimeout := topo.RemoteOperationTimeout
585+
defer func() {
586+
tmc = oldTmc
587+
topo.RemoteOperationTimeout = oldRemoteOpTimeout
588+
}()
589+
590+
tmc = tt.tmc
591+
if tt.remoteOpTimeout != 0 {
592+
topo.RemoteOperationTimeout = tt.remoteOpTimeout
593+
}
594+
595+
err := setReplicationSource(context.Background(), tt.tablet, tab101, false)
596+
if tt.errShouldContain == "" {
597+
require.NoError(t, err)
598+
return
599+
}
600+
require.ErrorContains(t, err, tt.errShouldContain)
601+
})
602+
}
603+
}

0 commit comments

Comments
 (0)