From 975d84e3ba8ba188f2d5c6d7b9d3f8ef46ce5949 Mon Sep 17 00:00:00 2001 From: Paul Banks Date: Tue, 24 Jan 2023 17:52:41 +0000 Subject: [PATCH] Make verifier tolerate non-identical bootstrap configuration entries --- verifier/store_test.go | 55 +++++++++++++++++++++++++++++++++++++++--- verifier/verifier.go | 7 ++++++ 2 files changed, 59 insertions(+), 3 deletions(-) diff --git a/verifier/store_test.go b/verifier/store_test.go index 9cf0736..c9fd05a 100644 --- a/verifier/store_test.go +++ b/verifier/store_test.go @@ -36,6 +36,21 @@ func TestStore(t *testing.T) { AssertReport("f1", LogRange{1234, 1234 + 5}, ""). Steps(), }, + { + name: "first index configuration is ignored", + steps: newTestSteps(1). + BootstrapNodes("leader", "f1"). + AppendN(5). + AssertCanRead("leader", LogRange{1, 7}). + AppendCheckpoint(). + AssertReport("leader", LogRange{1, 7}, "", func(t *testing.T, r *VerificationReport) { + require.Nil(t, r.SkippedRange) + }). + ReplicateTo("f1", 7, 0). + // Follower should report and match leader + AssertReport("f1", LogRange{1, 7}, ""). + Steps(), + }, { name: "leader WAL corruption", steps: newTestSteps(1234). @@ -167,6 +182,27 @@ func TestStore(t *testing.T) { ls := peers.logStore("leader") require.NoError(t, ls.StoreLogs(step.appendBatch)) + case len(step.bootstrapNodes) > 0: + // When we bootstrap we write a new configuration into log index 1 + // This actually happens independently on all servers in Raft and so + // the entry itself may not be byte-for-byte identical (although it + // should be logically identical). For example in Consul this comes + // from Serf gossip discovery and so the list of servers might be + // different on each server. That means validating the first log entry + // is likely to fail since it's legitimately different on each server. + // To simulate this, write a different entry to each server as + // "config". + for _, n := range step.bootstrapNodes { + // create the peer + ls := peers.logStore(n) + cfg := raft.Log{ + Index: 1, + Type: raft.LogConfiguration, + Data: []byte(fmt.Sprintf("config for %s", n)), + } + require.NoError(t, ls.StoreLog(&cfg)) + } + case step.replicateMax != 0: leader := peers.logStore("leader") follow := peers.logStore(step.targetNode) @@ -423,7 +459,7 @@ func assertCanRead(t *testing.T, s raft.LogStore, start, end uint64) { for idx := start; idx < end; idx++ { require.NoError(t, s.GetLog(idx, &log), "failed reading idx=%d", idx) require.Equal(t, int(idx), int(log.Index), "failed reading idx=%d, got idx=%d", idx, log.Index) - if !bytes.Equal(log.Data, []byte("CHECKPOINT")) { + if !bytes.Equal(log.Data, []byte("CHECKPOINT")) && log.Type != raft.LogConfiguration { require.Equal(t, fmt.Sprintf("LOG(%d)", idx), string(log.Data), "failed reading idx=%d", idx) } @@ -465,8 +501,9 @@ type testBuilder struct { } type testStep struct { - appendBatch []*raft.Log - checkPoint bool + appendBatch []*raft.Log + bootstrapNodes []string + checkPoint bool targetNode string @@ -560,6 +597,18 @@ func (b *testBuilder) AppendN(n int) *testBuilder { return b } +func (b *testBuilder) BootstrapNodes(nodes ...string) *testBuilder { + step := testStep{} + // Bootstrap config must be index 1 + step.bootstrapNodes = nodes + b.nextIndex = 2 + for _, n := range nodes { + b.peers[n] = 1 + } + b.steps = append(b.steps, step) + return b +} + func (b *testBuilder) AppendCheckpoint() *testBuilder { step := testStep{ appendBatch: []*raft.Log{{ diff --git a/verifier/verifier.go b/verifier/verifier.go index 96bc245..74cf336 100644 --- a/verifier/verifier.go +++ b/verifier/verifier.go @@ -210,6 +210,13 @@ func (s *LogStore) verify(report *VerificationReport) { } func checksumLog(sum uint64, log *raft.Log) uint64 { + // Special case for bootstrap config entries (index 1, type configuration) + // since these are not replicated by raft and so may not be byte-for-byte + // identical as long as they are logical the same on all peers. So just treat + // them all as identical to avoid false-positives on startup. + if log.Index == 1 && log.Type == raft.LogConfiguration { + return 0 + } sum = fnv1a.AddUint64(sum, log.Index) sum = fnv1a.AddUint64(sum, log.Term) sum = fnv1a.AddUint64(sum, uint64(log.Type))