Skip to content

Commit 4058966

Browse files
Support passing filters to discovery.NewHealthCheck(...) (#16170)
Signed-off-by: Tim Vaillancourt <tim@timvaillancourt.com>
1 parent c4a9d39 commit 4058966

File tree

8 files changed

+186
-58
lines changed

8 files changed

+186
-58
lines changed

go/vt/discovery/healthcheck.go

Lines changed: 32 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ import (
3535
"bytes"
3636
"context"
3737
"encoding/json"
38+
"errors"
3839
"fmt"
3940
"hash/crc32"
4041
"net/http"
@@ -105,6 +106,9 @@ var (
105106
// HealthCheckHealthyTemplate uses healthCheckTemplate with the `HealthCheck Tablet - Healthy Tablets` title to
106107
// create the HTML code required to render the list of healthy tablets from the HealthCheck.
107108
HealthCheckHealthyTemplate = fmt.Sprintf(healthCheckTemplate, "HealthCheck - Healthy Tablets")
109+
110+
// errKeyspacesToWatchAndTabletFilters is an error for cases where incompatible filters are defined.
111+
errKeyspacesToWatchAndTabletFilters = errors.New("only one of --keyspaces_to_watch and --tablet_filters may be specified at a time")
108112
)
109113

110114
// See the documentation for NewHealthCheck below for an explanation of these parameters.
@@ -301,6 +305,27 @@ type HealthCheckImpl struct {
301305
healthCheckDialSem *semaphore.Weighted
302306
}
303307

308+
// NewVTGateHealthCheckFilters returns healthcheck filters for vtgate.
309+
func NewVTGateHealthCheckFilters() (filters TabletFilters, err error) {
310+
if len(tabletFilters) > 0 {
311+
if len(KeyspacesToWatch) > 0 {
312+
return nil, errKeyspacesToWatchAndTabletFilters
313+
}
314+
315+
fbs, err := NewFilterByShard(tabletFilters)
316+
if err != nil {
317+
return nil, fmt.Errorf("failed to parse tablet_filters value %q: %v", strings.Join(tabletFilters, ","), err)
318+
}
319+
filters = append(filters, fbs)
320+
} else if len(KeyspacesToWatch) > 0 {
321+
filters = append(filters, NewFilterByKeyspace(KeyspacesToWatch))
322+
}
323+
if len(tabletFilterTags) > 0 {
324+
filters = append(filters, NewFilterByTabletTags(tabletFilterTags))
325+
}
326+
return filters, nil
327+
}
328+
304329
// NewHealthCheck creates a new HealthCheck object.
305330
// Parameters:
306331
// retryDelay.
@@ -322,10 +347,14 @@ type HealthCheckImpl struct {
322347
//
323348
// The localCell for this healthcheck
324349
//
325-
// callback.
350+
// cellsToWatch.
326351
//
327-
// A function to call when there is a primary change. Used to notify vtgate's buffer to stop buffering.
328-
func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Duration, topoServer *topo.Server, localCell, cellsToWatch string) *HealthCheckImpl {
352+
// Is a list of cells to watch for tablets.
353+
//
354+
// filters.
355+
//
356+
// Is one or more filters to apply when determining what tablets we want to stream healthchecks from.
357+
func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Duration, topoServer *topo.Server, localCell, cellsToWatch string, filters TabletFilter) *HealthCheckImpl {
329358
log.Infof("loading tablets for cells: %v", cellsToWatch)
330359

331360
hc := &HealthCheckImpl{
@@ -348,27 +377,10 @@ func NewHealthCheck(ctx context.Context, retryDelay, healthCheckTimeout time.Dur
348377
}
349378

350379
for _, c := range cells {
351-
var filters TabletFilters
352380
log.Infof("Setting up healthcheck for cell: %v", c)
353381
if c == "" {
354382
continue
355383
}
356-
if len(tabletFilters) > 0 {
357-
if len(KeyspacesToWatch) > 0 {
358-
log.Exitf("Only one of -keyspaces_to_watch and -tablet_filters may be specified at a time")
359-
}
360-
361-
fbs, err := NewFilterByShard(tabletFilters)
362-
if err != nil {
363-
log.Exitf("Cannot parse tablet_filters parameter: %v", err)
364-
}
365-
filters = append(filters, fbs)
366-
} else if len(KeyspacesToWatch) > 0 {
367-
filters = append(filters, NewFilterByKeyspace(KeyspacesToWatch))
368-
}
369-
if len(tabletFilterTags) > 0 {
370-
filters = append(filters, NewFilterByTabletTags(tabletFilterTags))
371-
}
372384
topoWatchers = append(topoWatchers, NewTopologyWatcher(ctx, topoServer, hc, filters, c, refreshInterval, refreshKnownTablets, topo.DefaultConcurrency))
373385
}
374386

go/vt/discovery/healthcheck_test.go

Lines changed: 75 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,77 @@ func init() {
6363
refreshInterval = time.Minute
6464
}
6565

66+
func TestNewVTGateHealthCheckFilters(t *testing.T) {
67+
defer func() {
68+
KeyspacesToWatch = nil
69+
tabletFilters = nil
70+
tabletFilterTags = nil
71+
}()
72+
73+
testCases := []struct {
74+
name string
75+
keyspacesToWatch []string
76+
tabletFilters []string
77+
tabletFilterTags map[string]string
78+
expectedError string
79+
expectedFilterTypes []any
80+
}{
81+
{
82+
name: "noFilters",
83+
},
84+
{
85+
name: "tabletFilters",
86+
tabletFilters: []string{"ks1|-80"},
87+
expectedFilterTypes: []any{&FilterByShard{}},
88+
},
89+
{
90+
name: "keyspacesToWatch",
91+
keyspacesToWatch: []string{"ks1"},
92+
expectedFilterTypes: []any{&FilterByKeyspace{}},
93+
},
94+
{
95+
name: "tabletFiltersAndTags",
96+
tabletFilters: []string{"ks1|-80"},
97+
tabletFilterTags: map[string]string{"test": "true"},
98+
expectedFilterTypes: []any{&FilterByShard{}, &FilterByTabletTags{}},
99+
},
100+
{
101+
name: "keyspacesToWatchAndTags",
102+
tabletFilterTags: map[string]string{"test": "true"},
103+
keyspacesToWatch: []string{"ks1"},
104+
expectedFilterTypes: []any{&FilterByKeyspace{}, &FilterByTabletTags{}},
105+
},
106+
{
107+
name: "failKeyspacesToWatchAndFilters",
108+
tabletFilters: []string{"ks1|-80"},
109+
keyspacesToWatch: []string{"ks1"},
110+
expectedError: errKeyspacesToWatchAndTabletFilters.Error(),
111+
},
112+
{
113+
name: "failInvalidTabletFilters",
114+
tabletFilters: []string{"shouldfail|"},
115+
expectedError: "failed to parse tablet_filters value \"shouldfail|\": error parsing shard name : Code: INVALID_ARGUMENT\nempty name\n",
116+
},
117+
}
118+
119+
for _, testCase := range testCases {
120+
t.Run(testCase.name, func(t *testing.T) {
121+
KeyspacesToWatch = testCase.keyspacesToWatch
122+
tabletFilters = testCase.tabletFilters
123+
tabletFilterTags = testCase.tabletFilterTags
124+
125+
filters, err := NewVTGateHealthCheckFilters()
126+
if testCase.expectedError != "" {
127+
assert.EqualError(t, err, testCase.expectedError)
128+
}
129+
assert.Len(t, filters, len(testCase.expectedFilterTypes))
130+
for i, filter := range filters {
131+
assert.IsType(t, testCase.expectedFilterTypes[i], filter)
132+
}
133+
})
134+
}
135+
}
136+
66137
func TestHealthCheck(t *testing.T) {
67138
ctx := utils.LeakCheckContext(t)
68139
// reset error counters
@@ -1100,7 +1171,7 @@ func TestPrimaryInOtherCell(t *testing.T) {
11001171

11011172
ts := memorytopo.NewServer(ctx, "cell1", "cell2")
11021173
defer ts.Close()
1103-
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell1", "cell1, cell2")
1174+
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell1", "cell1, cell2", nil)
11041175
defer hc.Close()
11051176

11061177
// add a tablet as primary in different cell
@@ -1160,7 +1231,7 @@ func TestReplicaInOtherCell(t *testing.T) {
11601231

11611232
ts := memorytopo.NewServer(ctx, "cell1", "cell2")
11621233
defer ts.Close()
1163-
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell1", "cell1, cell2")
1234+
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell1", "cell1, cell2", nil)
11641235
defer hc.Close()
11651236

11661237
// add a tablet as replica
@@ -1265,7 +1336,7 @@ func TestCellAliases(t *testing.T) {
12651336

12661337
ts := memorytopo.NewServer(ctx, "cell1", "cell2")
12671338
defer ts.Close()
1268-
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell1", "cell1, cell2")
1339+
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell1", "cell1, cell2", nil)
12691340
defer hc.Close()
12701341

12711342
cellsAlias := &topodatapb.CellsAlias{
@@ -1416,7 +1487,7 @@ func tabletDialer(ctx context.Context, tablet *topodatapb.Tablet, _ grpcclient.F
14161487
}
14171488

14181489
func createTestHc(ctx context.Context, ts *topo.Server) *HealthCheckImpl {
1419-
return NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell", "")
1490+
return NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, "cell", "", nil)
14201491
}
14211492

14221493
type fakeConn struct {

go/vt/discovery/keyspace_events_test.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ func TestSrvKeyspaceWithNilNewKeyspace(t *testing.T) {
4343
factory.AddCell(cell)
4444
ts := faketopo.NewFakeTopoServer(ctx, factory)
4545
ts2 := &fakeTopoServer{}
46-
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, cell, "")
46+
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, cell, "", nil)
4747
defer hc.Close()
4848
kew := NewKeyspaceEventWatcher(ctx, ts2, hc, cell)
4949
kss := &keyspaceState{
@@ -143,7 +143,7 @@ func TestKeyspaceEventTypes(t *testing.T) {
143143
factory.AddCell(cell)
144144
ts := faketopo.NewFakeTopoServer(ctx, factory)
145145
ts2 := &fakeTopoServer{}
146-
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, cell, "")
146+
hc := NewHealthCheck(ctx, 1*time.Millisecond, time.Hour, ts, cell, "", nil)
147147
defer hc.Close()
148148
kew := NewKeyspaceEventWatcher(ctx, ts2, hc, cell)
149149

go/vt/discovery/topology_watcher_test.go

Lines changed: 30 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -122,10 +122,11 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) {
122122
defer ts.Close()
123123
fhc := NewFakeHealthCheck(nil)
124124
defer fhc.Close()
125+
filter := NewFilterByKeyspace([]string{"keyspace"})
125126
logger := logutil.NewMemoryLogger()
126127
topologyWatcherOperations.ZeroAll()
127128
counts := topologyWatcherOperations.Counts()
128-
tw := NewTopologyWatcher(context.Background(), ts, fhc, nil, "aa", 10*time.Minute, refreshKnownTablets, 5)
129+
tw := NewTopologyWatcher(context.Background(), ts, fhc, filter, "aa", 10*time.Minute, refreshKnownTablets, 5)
129130

130131
counts = checkOpCounts(t, counts, map[string]int64{})
131132
checkChecksum(t, tw, 0)
@@ -172,10 +173,31 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) {
172173
require.NoError(t, ts.CreateTablet(context.Background(), tablet2), "CreateTablet failed for %v", tablet2.Alias)
173174
tw.loadTablets()
174175

176+
// Confirm second tablet triggers ListTablets + AddTablet calls.
175177
counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "AddTablet": 1})
176178
checkChecksum(t, tw, 2762153755)
177179

178-
// Check the new tablet is returned by GetAllTablets().
180+
// Add a third tablet in a filtered keyspace to the topology.
181+
tablet3 := &topodatapb.Tablet{
182+
Alias: &topodatapb.TabletAlias{
183+
Cell: "aa",
184+
Uid: 3,
185+
},
186+
Hostname: "host3",
187+
PortMap: map[string]int32{
188+
"vt": 789,
189+
},
190+
Keyspace: "excluded",
191+
Shard: "shard",
192+
}
193+
require.NoError(t, ts.CreateTablet(context.Background(), tablet3), "CreateTablet failed for %v", tablet3.Alias)
194+
tw.loadTablets()
195+
196+
// Confirm filtered tablet did not trigger an AddTablet call.
197+
counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "AddTablet": 0})
198+
checkChecksum(t, tw, 3177315266)
199+
200+
// Check the second tablet is returned by GetAllTablets(). This should not contain the filtered tablet.
179201
allTablets = fhc.GetAllTablets()
180202
key = TabletToMapKey(tablet2)
181203
assert.Len(t, allTablets, 2)
@@ -207,14 +229,14 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) {
207229
assert.Contains(t, allTablets, key)
208230
assert.True(t, proto.Equal(tablet, allTablets[key]))
209231
assert.NotContains(t, allTablets, origKey)
210-
checkChecksum(t, tw, 2762153755)
232+
checkChecksum(t, tw, 3177315266)
211233
} else {
212234
counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "ReplaceTablet": 0})
213235
assert.Len(t, allTablets, 2)
214236
assert.Contains(t, allTablets, origKey)
215237
assert.True(t, proto.Equal(origTablet, allTablets[origKey]))
216238
assert.NotContains(t, allTablets, key)
217-
checkChecksum(t, tw, 2762153755)
239+
checkChecksum(t, tw, 3177315266)
218240
}
219241

220242
// Both tablets restart on different hosts.
@@ -269,7 +291,7 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) {
269291
require.Nil(t, err, "FixShardReplication failed")
270292
tw.loadTablets()
271293
counts = checkOpCounts(t, counts, map[string]int64{"ListTablets": 1, "GetTablet": 0, "RemoveTablet": 1})
272-
checkChecksum(t, tw, 789108290)
294+
checkChecksum(t, tw, 852159264)
273295

274296
allTablets = fhc.GetAllTablets()
275297
assert.Len(t, allTablets, 1)
@@ -280,8 +302,10 @@ func checkWatcher(t *testing.T, refreshKnownTablets bool) {
280302
assert.Contains(t, allTablets, key)
281303
assert.True(t, proto.Equal(tablet2, allTablets[key]))
282304

283-
// Remove the other and check that it is detected as being gone.
305+
// Remove the other tablets and check that it is detected as being gone.
306+
// Deleting the filtered tablet should not trigger a RemoveTablet call.
284307
require.NoError(t, ts.DeleteTablet(context.Background(), tablet2.Alias))
308+
require.NoError(t, ts.DeleteTablet(context.Background(), tablet3.Alias))
285309
_, err = topo.FixShardReplication(context.Background(), ts, logger, "aa", "keyspace", "shard")
286310
require.Nil(t, err, "FixShardReplication failed")
287311
tw.loadTablets()

go/vt/throttler/demo/throttler_demo.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -239,7 +239,7 @@ func newClient(ctx context.Context, primary *primary, replica *replica, ts *topo
239239
log.Fatal(err)
240240
}
241241

242-
healthCheck := discovery.NewHealthCheck(ctx, 5*time.Second, 1*time.Minute, ts, "cell1", "")
242+
healthCheck := discovery.NewHealthCheck(ctx, 5*time.Second, 1*time.Minute, ts, "cell1", "", nil)
243243
c := &client{
244244
primary: primary,
245245
healthCheck: healthCheck,

go/vt/vtgate/tabletgateway.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,11 @@ type TabletGateway struct {
101101
}
102102

103103
func createHealthCheck(ctx context.Context, retryDelay, timeout time.Duration, ts *topo.Server, cell, cellsToWatch string) discovery.HealthCheck {
104-
return discovery.NewHealthCheck(ctx, retryDelay, timeout, ts, cell, cellsToWatch)
104+
filters, err := discovery.NewVTGateHealthCheckFilters()
105+
if err != nil {
106+
log.Exit(err)
107+
}
108+
return discovery.NewHealthCheck(ctx, retryDelay, timeout, ts, cell, cellsToWatch, filters)
105109
}
106110

107111
// NewTabletGateway creates and returns a new TabletGateway

0 commit comments

Comments
 (0)