Skip to content

Commit 7ff9389

Browse files
committed
Throttler: fix race conditions in Operate() termination and in tests
Signed-off-by: Shlomi Noach <2607934+shlomi-noach@users.noreply.github.com>
1 parent 44299cf commit 7ff9389

File tree

3 files changed

+67
-87
lines changed

3 files changed

+67
-87
lines changed

go/vt/vttablet/tabletserver/throttle/throttler.go

Lines changed: 15 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -424,25 +424,28 @@ func (throttler *Throttler) IsRunning() bool {
424424

425425
// Enable activates the throttler probes; when enabled, the throttler responds to check queries based on
426426
// the collected metrics.
427-
func (throttler *Throttler) Enable() bool {
427+
// The function returns a WaitGroup that can be used to wait for the throttler to be fully disabled, ie when
428+
// the Operate() goroutine function terminates and caches are invalidated.
429+
func (throttler *Throttler) Enable() *sync.WaitGroup {
428430
throttler.enableMutex.Lock()
429431
defer throttler.enableMutex.Unlock()
430432

431433
if wasEnabled := throttler.isEnabled.Swap(true); wasEnabled {
432434
log.Infof("Throttler: already enabled")
433-
return false
435+
return nil
434436
}
435437
log.Infof("Throttler: enabling")
436438

439+
wg := &sync.WaitGroup{}
437440
var ctx context.Context
438441
ctx, throttler.cancelEnableContext = context.WithCancel(context.Background())
439442
throttler.check.SelfChecks(ctx)
440-
throttler.Operate(ctx)
443+
throttler.Operate(ctx, wg)
441444

442445
// Make a one-time request for a lease of heartbeats
443446
go throttler.heartbeatWriter.RequestHeartbeats()
444447

445-
return true
448+
return wg
446449
}
447450

448451
// Disable deactivates the probes and associated operations. When disabled, the throttler responds to check
@@ -457,10 +460,6 @@ func (throttler *Throttler) Disable() bool {
457460
}
458461
log.Infof("Throttler: disabling")
459462
// _ = throttler.updateConfig(ctx, false, throttler.MetricsThreshold.Get()) // TODO(shlomi)
460-
throttler.aggregatedMetrics.Flush()
461-
throttler.recentApps.Flush()
462-
throttler.nonLowPriorityAppRequestsThrottled.Flush()
463-
// we do not flush throttler.throttledApps because this is data submitted by the user; the user expects the data to survive a disable+enable
464463

465464
throttler.cancelEnableContext()
466465
return true
@@ -641,7 +640,7 @@ func (throttler *Throttler) isDormant() bool {
641640

642641
// Operate is the main entry point for the throttler operation and logic. It will
643642
// run the probes, collect metrics, refresh inventory, etc.
644-
func (throttler *Throttler) Operate(ctx context.Context) {
643+
func (throttler *Throttler) Operate(ctx context.Context, wg *sync.WaitGroup) {
645644
tickers := [](*timer.SuspendableTicker){}
646645
addTicker := func(d time.Duration) *timer.SuspendableTicker {
647646
t := timer.NewSuspendableTicker(d, false)
@@ -656,7 +655,14 @@ func (throttler *Throttler) Operate(ctx context.Context) {
656655
throttledAppsTicker := addTicker(throttler.throttledAppsSnapshotInterval)
657656
recentCheckTicker := addTicker(time.Second)
658657

658+
wg.Add(1)
659659
go func() {
660+
defer wg.Done()
661+
defer throttler.aggregatedMetrics.Flush()
662+
defer throttler.recentApps.Flush()
663+
defer throttler.nonLowPriorityAppRequestsThrottled.Flush()
664+
// we do not flush throttler.throttledApps because this is data submitted by the user; the user expects the data to survive a disable+enable
665+
660666
defer log.Infof("Throttler: Operate terminated, tickers stopped")
661667
for _, t := range tickers {
662668
defer t.Stop()

go/vt/vttablet/tabletserver/throttle/throttler_exclude_race_test.go

Lines changed: 0 additions & 76 deletions
This file was deleted.

go/vt/vttablet/tabletserver/throttle/throttler_test.go

Lines changed: 52 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -331,9 +331,10 @@ func runThrottler(t *testing.T, throttler *Throttler, timeout time.Duration, f f
331331
assert.True(t, throttler.IsOpen())
332332
assert.False(t, throttler.IsEnabled())
333333

334-
ok := throttler.Enable()
334+
wg := throttler.Enable()
335335
defer throttler.Disable()
336-
assert.True(t, ok)
336+
assert.NotNil(t, wg)
337+
defer wg.Wait()
337338
assert.True(t, throttler.IsEnabled())
338339

339340
if f != nil {
@@ -382,3 +383,52 @@ func TestProbesWhileOperating(t *testing.T) {
382383
})
383384
})
384385
}
386+
387+
// TestProbesPostDisable runs the throttler for some time, and then investigates the internal throttler maps and values.
388+
// While the throttler is disabled, it is technically safe to iterate those structures. However, `go test -race` disagrees,
389+
// which is why this test is in this *exclude_race* file
390+
func TestProbesPostDisable(t *testing.T) {
391+
throttler := newTestThrottler()
392+
runThrottler(t, throttler, 2*time.Second, nil)
393+
394+
time.Sleep(time.Second) // throttler's Operate() quits asynchronously. For sake of `go test -race` we allow a graceful wait.
395+
probes := throttler.mysqlInventory.ClustersProbes
396+
assert.NotEmpty(t, probes)
397+
398+
selfProbes := probes[selfStoreName]
399+
t.Run("self", func(t *testing.T) {
400+
assert.NotEmpty(t, selfProbes)
401+
require.Equal(t, 1, len(selfProbes)) // should always be true once refreshMySQLInventory() runs
402+
probe, ok := selfProbes[""]
403+
assert.True(t, ok)
404+
assert.NotNil(t, probe)
405+
406+
assert.Equal(t, "", probe.Alias)
407+
assert.Nil(t, probe.Tablet)
408+
assert.Equal(t, "select 1", probe.MetricQuery)
409+
assert.Zero(t, atomic.LoadInt64(&probe.QueryInProgress))
410+
})
411+
412+
shardProbes := probes[shardStoreName]
413+
t.Run("shard", func(t *testing.T) {
414+
assert.NotEmpty(t, shardProbes)
415+
assert.Equal(t, 2, len(shardProbes)) // see fake FindAllTabletAliasesInShard above
416+
for _, probe := range shardProbes {
417+
require.NotNil(t, probe)
418+
assert.NotEmpty(t, probe.Alias)
419+
assert.NotNil(t, probe.Tablet)
420+
assert.Equal(t, "select 1", probe.MetricQuery)
421+
assert.Zero(t, atomic.LoadInt64(&probe.QueryInProgress))
422+
}
423+
})
424+
425+
t.Run("metrics", func(t *testing.T) {
426+
assert.Equal(t, 3, len(throttler.mysqlInventory.TabletMetrics)) // 1 self tablet + 2 shard tablets
427+
})
428+
429+
t.Run("aggregated", func(t *testing.T) {
430+
assert.Zero(t, throttler.aggregatedMetrics.ItemCount()) // flushed upon Disable()
431+
aggr := throttler.aggregatedMetricsSnapshot()
432+
assert.Empty(t, aggr)
433+
})
434+
}

0 commit comments

Comments
 (0)