Skip to content

Commit 2493afc

Browse files
TestMultipleClients*: Poll for the memberlist status for 10 seconds
Closes #389 These tests are highly dependant on timing, which makes them flaky. Poll for a bit longer at the end to make sure that the memberlist was updated
1 parent 879ff5a commit 2493afc

File tree

1 file changed

+61
-41
lines changed

1 file changed

+61
-41
lines changed

kv/memberlist/memberlist_client_test.go

Lines changed: 61 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -613,7 +613,7 @@ func TestMultipleClientsWithMixedLabelsAndExpectFailure(t *testing.T) {
613613

614614
err := testMultipleClientsWithConfigGenerator(t, len(membersLabel), configGen)
615615
require.Error(t, err)
616-
require.Contains(t, err.Error(), fmt.Sprintf("expected to see at least %d updates", len(membersLabel)))
616+
require.Contains(t, err.Error(), "expected to see at least 2 members, got 1")
617617
}
618618

619619
func TestMultipleClientsWithMixedLabelsAndClusterLabelVerificationDisabled(t *testing.T) {
@@ -662,6 +662,8 @@ func TestMultipleClientsWithSameLabelWithClusterLabelVerification(t *testing.T)
662662
}
663663

664664
func testMultipleClientsWithConfigGenerator(t *testing.T, members int, configGen func(memberId int) KVConfig) error {
665+
t.Helper()
666+
665667
c := dataCodec{}
666668
const key = "ring"
667669
var clients []*Client
@@ -723,11 +725,10 @@ func testMultipleClientsWithConfigGenerator(t *testing.T, members int, configGen
723725
startTime := time.Now()
724726
firstKv := clients[0]
725727
ctx, cancel := context.WithTimeout(context.Background(), casInterval*3) // Watch for 3x cas intervals.
726-
updates := 0
728+
joinedMembers := 0
727729
firstKv.WatchKey(ctx, key, func(in interface{}) bool {
728-
updates++
729-
730730
r := in.(*data)
731+
joinedMembers = len(r.Members)
731732

732733
minTimestamp, maxTimestamp, avgTimestamp := getTimestamps(r.Members)
733734

@@ -740,12 +741,9 @@ func testMultipleClientsWithConfigGenerator(t *testing.T, members int, configGen
740741
})
741742
cancel() // make linter happy
742743

743-
t.Logf("Ring updates observed: %d", updates)
744-
745-
if updates < members {
746-
// in general, at least one update from each node. (although that's not necessarily true...
747-
// but typically we get more updates than that anyway)
748-
return fmt.Errorf("expected to see at least %d updates, got %d", members, updates)
744+
if joinedMembers <= 1 {
745+
// expect at least 2 members. Otherwise, this means that the ring has failed to sync.
746+
return fmt.Errorf("expected to see at least 2 members, got %d", joinedMembers)
749747
}
750748

751749
if err := getClientErr(); err != nil {
@@ -755,47 +753,69 @@ func testMultipleClientsWithConfigGenerator(t *testing.T, members int, configGen
755753
// Let's check all the clients to see if they have relatively up-to-date information
756754
// All of them should at least have all the clients
757755
// And same tokens.
758-
allTokens := []uint32(nil)
759-
760-
for i := 0; i < members; i++ {
761-
kv := clients[i]
756+
check := func() error {
757+
allTokens := []uint32(nil)
762758

763-
r := getData(t, kv, key)
764-
t.Logf("KV %d: number of known members: %d\n", i, len(r.Members))
765-
if len(r.Members) != members {
766-
return fmt.Errorf("Member %d has only %d members in the ring", i, len(r.Members))
767-
}
759+
for i := 0; i < members; i++ {
760+
kv := clients[i]
768761

769-
minTimestamp, maxTimestamp, avgTimestamp := getTimestamps(r.Members)
770-
for n, ing := range r.Members {
771-
if ing.State != ACTIVE {
772-
return fmt.Errorf("Member %d: invalid state of member %s in the ring: %v ", i, n, ing.State)
762+
r := getData(t, kv, key)
763+
t.Logf("KV %d: number of known members: %d\n", i, len(r.Members))
764+
if len(r.Members) != members {
765+
return fmt.Errorf("Member %d has only %d members in the ring", i, len(r.Members))
773766
}
774-
}
775-
now := time.Now()
776-
t.Logf("Member %d: oldest: %v, avg: %v, youngest: %v", i,
777-
now.Sub(time.Unix(minTimestamp, 0)).String(),
778-
now.Sub(time.Unix(avgTimestamp, 0)).String(),
779-
now.Sub(time.Unix(maxTimestamp, 0)).String())
780-
781-
tokens := r.getAllTokens()
782-
if allTokens == nil {
783-
allTokens = tokens
784-
t.Logf("Found tokens: %d", len(allTokens))
785-
} else {
786-
if len(allTokens) != len(tokens) {
787-
return fmt.Errorf("Member %d: Expected %d tokens, got %d", i, len(allTokens), len(tokens))
767+
768+
minTimestamp, maxTimestamp, avgTimestamp := getTimestamps(r.Members)
769+
for n, ing := range r.Members {
770+
if ing.State != ACTIVE {
771+
stateStr := "UNKNOWN"
772+
switch ing.State {
773+
case JOINING:
774+
stateStr = "JOINING"
775+
case LEFT:
776+
stateStr = "LEFT"
777+
}
778+
return fmt.Errorf("Member %d: invalid state of member %s in the ring: %s (%v) ", i, n, stateStr, ing.State)
779+
}
788780
}
781+
now := time.Now()
782+
t.Logf("Member %d: oldest: %v, avg: %v, youngest: %v", i,
783+
now.Sub(time.Unix(minTimestamp, 0)).String(),
784+
now.Sub(time.Unix(avgTimestamp, 0)).String(),
785+
now.Sub(time.Unix(maxTimestamp, 0)).String())
786+
787+
tokens := r.getAllTokens()
788+
if allTokens == nil {
789+
allTokens = tokens
790+
t.Logf("Found tokens: %d", len(allTokens))
791+
} else {
792+
if len(allTokens) != len(tokens) {
793+
return fmt.Errorf("Member %d: Expected %d tokens, got %d", i, len(allTokens), len(tokens))
794+
}
789795

790-
for ix, tok := range allTokens {
791-
if tok != tokens[ix] {
792-
return fmt.Errorf("Member %d: Tokens at position %d differ: %v, %v", i, ix, tok, tokens[ix])
796+
for ix, tok := range allTokens {
797+
if tok != tokens[ix] {
798+
return fmt.Errorf("Member %d: Tokens at position %d differ: %v, %v", i, ix, tok, tokens[ix])
799+
}
793800
}
794801
}
795802
}
803+
804+
return getClientErr()
796805
}
797806

798-
return getClientErr()
807+
// Try this for ~10 seconds. memberlist is eventually consistent, so we may need to wait a bit, especially with `-race`.
808+
for timeout := time.After(10 * time.Second); ; {
809+
select {
810+
case <-timeout:
811+
return check() // return last error
812+
default:
813+
if err := check(); err == nil {
814+
return nil // it passed
815+
}
816+
time.Sleep(100 * time.Millisecond)
817+
}
818+
}
799819
}
800820

801821
func TestJoinMembersWithRetryBackoff(t *testing.T) {

0 commit comments

Comments
 (0)