diff --git a/go/flags/endtoend/vtgate.txt b/go/flags/endtoend/vtgate.txt index c023a08af00..312dcb6f74d 100644 --- a/go/flags/endtoend/vtgate.txt +++ b/go/flags/endtoend/vtgate.txt @@ -28,7 +28,6 @@ Flags: --allow-kill-statement Allows the execution of kill statement --allowed_tablet_types strings Specifies the tablet types this vtgate is allowed to route queries to. Should be provided as a comma-separated set of tablet types. --alsologtostderr log to standard error as well as files - --balancer-enabled Whether to enable the tablet balancer to evenly spread query load --balancer-keyspaces strings When in balanced mode, a comma-separated list of keyspaces for which to use the balancer (optional) --balancer-vtgate-cells strings When in balanced mode, a comma-separated list of cells that contain vtgates (required) --bind-address string Bind address for the server. If empty, the server will listen on all available unicast and anycast IP addresses of the local system. @@ -56,6 +55,7 @@ Flags: --discovery_high_replication_lag_minimum_serving duration Threshold above which replication lag is considered too high when applying the min_number_serving_vttablets flag. (default 2h0m0s) --discovery_low_replication_lag duration Threshold below which replication lag is considered low enough to be healthy. (default 30s) --emit_stats If set, emit stats to push-based monitoring and stats backends + --enable-balancer Enable the tablet balancer to evenly spread query load for a given tablet type --enable-partial-keyspace-migration (Experimental) Follow shard routing rules: enable only while migrating a keyspace shard by shard. See documentation on Partial MoveTables for more. (default false) --enable-views Enable views support in vtgate. --enable_buffer Enable buffering (stalling) of primary traffic during failovers. diff --git a/go/vt/vtgate/balancer/balancer.go b/go/vt/vtgate/balancer/balancer.go index 0e84cb66e21..bfe85194c05 100644 --- a/go/vt/vtgate/balancer/balancer.go +++ b/go/vt/vtgate/balancer/balancer.go @@ -1,5 +1,5 @@ /* -Copyright 2023 The Vitess Authors. +Copyright 2024 The Vitess Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -92,7 +92,7 @@ type TabletBalancer interface { // for a given query to maintain the desired balanced allocation over multiple executions. Pick(target *querypb.Target, tablets []*discovery.TabletHealth) *discovery.TabletHealth - // Balancer debug page request + // DebugHandler provides a summary of tablet balancer state DebugHandler(w http.ResponseWriter, r *http.Request) } diff --git a/go/vt/vtgate/balancer/balancer_test.go b/go/vt/vtgate/balancer/balancer_test.go index e110f3fc5c0..bdfd3803609 100644 --- a/go/vt/vtgate/balancer/balancer_test.go +++ b/go/vt/vtgate/balancer/balancer_test.go @@ -1,5 +1,5 @@ /* -Copyright 2023 The Vitess Authors. +Copyright 2024 The Vitess Authors. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -20,6 +20,8 @@ import ( "strconv" "testing" + "github.com/stretchr/testify/assert" + "vitess.io/vitess/go/vt/discovery" querypb "vitess.io/vitess/go/vt/proto/query" topodatapb "vitess.io/vitess/go/vt/proto/topodata" @@ -45,17 +47,6 @@ func createTestTablet(cell string) *discovery.TabletHealth { } } -// allow 2% fuzz -const FUZZ = 2 - -func fuzzyEquals(a, b int) bool { - diff := a - b - if diff < 0 { - diff = -diff - } - return diff < a*FUZZ/100 -} - func TestAllocateFlows(t *testing.T) { cases := []struct { test string @@ -180,15 +171,13 @@ func TestAllocateFlows(t *testing.T) { a := b.allocateFlows(tablets) b.allocations[discovery.KeyFromTarget(target)] = a - t.Logf("Target Flows %v, Balancer: %s XXX %d %v \n", expectedPerCell, b.print(), len(b.allocations), b.allocations) + t.Logf("Target Flows %v, Balancer: %s, Allocations: %v \n", expectedPerCell, b.print(), b.allocations) // Accumulate all the output per tablet cell outflowPerCell := make(map[string]int) for _, outflow := range a.Outflows { for tabletCell, flow := range outflow { - if flow < 0 { - t.Errorf("balancer %v negative outflow", b.print()) - } + assert.GreaterOrEqual(t, flow, 0, b.print()) outflowPerCell[tabletCell] += flow } } @@ -197,10 +186,12 @@ func TestAllocateFlows(t *testing.T) { for cell := range tabletsByCell { expectedForCell := expectedPerCell[cell] - if !fuzzyEquals(a.Inflows[cell], expectedForCell) || !fuzzyEquals(outflowPerCell[cell], expectedForCell) { - t.Errorf("Balancer {%s} ExpectedPerCell {%v} did not allocate correct flow to cell %s: expected %d, inflow %d outflow %d", - b.print(), expectedPerCell, cell, expectedForCell, a.Inflows[cell], outflowPerCell[cell]) - } + assert.InEpsilonf(t, expectedForCell, a.Inflows[cell], 0.01, + "did not allocate correct inflow to cell %s. Balancer {%s} ExpectedPerCell {%v}", + cell, b.print(), expectedPerCell) + assert.InEpsilonf(t, expectedForCell, outflowPerCell[cell], 0.01, + "did not allocate correct outflow to cell %s. Balancer {%s} ExpectedPerCell {%v}", + cell, b.print(), expectedPerCell) } // Accumulate the allocations for all runs to compare what the system does as a whole @@ -215,10 +206,8 @@ func TestAllocateFlows(t *testing.T) { uid := tablet.Tablet.Alias.Uid allocation := allocationPerTablet[uid] - if !fuzzyEquals(allocation, expectedPerTablet) { - t.Errorf("did not allocate full allocation to tablet %d: expected %d got %d", - uid, expectedPerTablet, allocation) - } + assert.InEpsilonf(t, expectedPerTablet, allocation, 0.01, + "did not allocate full allocation to tablet %d", uid) } } } @@ -324,11 +313,9 @@ func TestBalancedPick(t *testing.T) { for _, tablet := range tablets { got := routed[tablet.Tablet.Alias.Uid] delta[tablet.Tablet.Alias.Uid] = got - expected - if !fuzzyEquals(got, expected) { - t.Errorf("routing to tablet %d got %d expected %d", tablet.Tablet.Alias.Uid, got, expected) - } + assert.InEpsilonf(t, expected, got, 0.01, + "routing to tablet %d", tablet.Tablet.Alias.Uid) } - t.Logf("Expected %d per tablet, Routed %v, Delta %v, Max delta %d", N/len(tablets), routed, delta, expected*FUZZ/100) } } @@ -353,17 +340,9 @@ func TestTopologyChanged(t *testing.T) { th := b.Pick(target, tablets) allocation, totalAllocation := b.getAllocation(target, tablets) - if totalAllocation != ALLOCATION/2 { - t.Errorf("totalAllocation mismatch %s", b.print()) - } - - if allocation[th.Tablet.Alias.Uid] != ALLOCATION/4 { - t.Errorf("allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) - } - - if th.Tablet.Alias.Cell != "a" { - t.Errorf("shuffle promoted wrong tablet from cell %s", tablets[0].Tablet.Alias.Cell) - } + assert.Equalf(t, ALLOCATION/2, totalAllocation, "totalAllocation mismatch %s", b.print()) + assert.Equalf(t, ALLOCATION/4, allocation[th.Tablet.Alias.Uid], "allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) + assert.Equalf(t, "a", th.Tablet.Alias.Cell, "shuffle promoted wrong tablet from cell %s", allTablets[0].Tablet.Alias.Cell) } // Run again with the full topology. Now traffic should go to cell b @@ -372,17 +351,9 @@ func TestTopologyChanged(t *testing.T) { allocation, totalAllocation := b.getAllocation(target, allTablets) - if totalAllocation != ALLOCATION/2 { - t.Errorf("totalAllocation mismatch %s", b.print()) - } - - if allocation[th.Tablet.Alias.Uid] != ALLOCATION/4 { - t.Errorf("allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) - } - - if th.Tablet.Alias.Cell != "b" { - t.Errorf("shuffle promoted wrong tablet from cell %s", allTablets[0].Tablet.Alias.Cell) - } + assert.Equalf(t, ALLOCATION/2, totalAllocation, "totalAllocation mismatch %s", b.print()) + assert.Equalf(t, ALLOCATION/4, allocation[th.Tablet.Alias.Uid], "allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) + assert.Equalf(t, "b", th.Tablet.Alias.Cell, "shuffle promoted wrong tablet from cell %s", allTablets[0].Tablet.Alias.Cell) } // Run again with a node in the topology replaced. @@ -393,17 +364,9 @@ func TestTopologyChanged(t *testing.T) { allocation, totalAllocation := b.getAllocation(target, allTablets) - if totalAllocation != ALLOCATION/2 { - t.Errorf("totalAllocation mismatch %s", b.print()) - } - - if allocation[th.Tablet.Alias.Uid] != ALLOCATION/4 { - t.Errorf("allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) - } - - if th.Tablet.Alias.Cell != "b" { - t.Errorf("shuffle promoted wrong tablet from cell %s", allTablets[0].Tablet.Alias.Cell) - } + assert.Equalf(t, ALLOCATION/2, totalAllocation, "totalAllocation mismatch %s", b.print()) + assert.Equalf(t, ALLOCATION/4, allocation[th.Tablet.Alias.Uid], "allocation mismatch %s, cell %s", b.print(), allTablets[0].Tablet.Alias.Cell) + assert.Equalf(t, "b", th.Tablet.Alias.Cell, "shuffle promoted wrong tablet from cell %s", allTablets[0].Tablet.Alias.Cell) } } diff --git a/go/vt/vtgate/tabletgateway.go b/go/vt/vtgate/tabletgateway.go index 726c9950e3f..4bcd6565205 100644 --- a/go/vt/vtgate/tabletgateway.go +++ b/go/vt/vtgate/tabletgateway.go @@ -70,7 +70,7 @@ func init() { fs.StringVar(&CellsToWatch, "cells_to_watch", "", "comma-separated list of cells for watching tablets") fs.DurationVar(&initialTabletTimeout, "gateway_initial_tablet_timeout", 30*time.Second, "At startup, the tabletGateway will wait up to this duration to get at least one tablet per keyspace/shard/tablet type") fs.IntVar(&retryCount, "retry-count", 2, "retry count") - fs.BoolVar(&balancerEnabled, "balancer-enabled", false, "Whether to enable the tablet balancer to evenly spread query load") + fs.BoolVar(&balancerEnabled, "enable-balancer", false, "Enable the tablet balancer to evenly spread query load for a given tablet type") fs.StringSliceVar(&balancerVtgateCells, "balancer-vtgate-cells", []string{}, "When in balanced mode, a comma-separated list of cells that contain vtgates (required)") fs.StringSliceVar(&balancerKeyspaces, "balancer-keyspaces", []string{}, "When in balanced mode, a comma-separated list of keyspaces for which to use the balancer (optional)") })