From 1f2f425dff6360195e154d52a34c65e365dab981 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Peter=20=C5=A0tibran=C3=BD?= Date: Tue, 13 Aug 2024 16:21:23 +0200 Subject: [PATCH] Don't include read-only instances if they changed state outside of lookback. --- ring/ring.go | 8 +++++++- ring/ring_test.go | 41 ++++++++++++++++++++++++++++++----------- 2 files changed, 37 insertions(+), 12 deletions(-) diff --git a/ring/ring.go b/ring/ring.go index cf8adaf14..87296cab5 100644 --- a/ring/ring.go +++ b/ring/ring.go @@ -800,11 +800,17 @@ func (r *Ring) shuffleShard(identifier string, size int, lookbackPeriod time.Dur instanceID := info.InstanceID instance := r.ringDesc.Ingesters[instanceID] - // If the instance is read only without a lookback, do not include it in the shard. + // On write path (lookbackPeriod == 0), read only instances are excluded. if lookbackPeriod == 0 && instance.ReadOnly { continue } + // On read path (lookbackPeriod > 0), read only instances are only included if they have not changed read-only status in the lookback window. + // If ReadOnlyUpdatedTimestamp is not set, we include the instance, and extend the shard later. + if lookbackPeriod > 0 && instance.ReadOnly && (instance.ReadOnlyUpdatedTimestamp > 0 && instance.ReadOnlyUpdatedTimestamp < lookbackUntil) { + continue + } + // Include instance in the subring. shard[instanceID] = instance // If the lookback is enabled and this instance has been registered within the lookback period diff --git a/ring/ring_test.go b/ring/ring_test.go index 1ac2e0e17..f63a43a5a 100644 --- a/ring/ring_test.go +++ b/ring/ring_test.go @@ -1869,6 +1869,7 @@ func TestRing_ShuffleShardWithLookback(t *testing.T) { instanceDesc InstanceDesc shardSize int expected []string + readOnly bool readOnlyTime time.Time } @@ -1974,7 +1975,7 @@ func TestRing_ShuffleShardWithLookback(t *testing.T) { timeline: []event{ // instance 2 is included in addition to another instance {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, - {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnlyTime: now.Add(lookbackPeriod / 2)}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true, readOnlyTime: now.Add(lookbackPeriod / 2)}, {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2", "instance-3"}}, }, @@ -1983,9 +1984,17 @@ func TestRing_ShuffleShardWithLookback(t *testing.T) { timeline: []event{ // readOnlyTime is too old to matter {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, - {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnlyTime: now.Add(-2 * lookbackPeriod)}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true, readOnlyTime: now.Add(-2 * lookbackPeriod)}, {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, - {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2"}}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-3"}}, + }, + }, + "single zone, with read only instance, with unknown ReadOnlyUpdatedTimestamp": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-a", []uint32{userToken(userID, "zone-a", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-a", []uint32{userToken(userID, "zone-a", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: test, shardSize: 2, expected: []string{"instance-1", "instance-2", "instance-3"}}, }, }, "multi zone, shard size = 3, recently bootstrapped cluster": { @@ -2088,11 +2097,11 @@ func TestRing_ShuffleShardWithLookback(t *testing.T) { timeline: []event{ {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, // instance 2 and 4 are included in addition to other instances in the same zone - {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnlyTime: now.Add(lookbackPeriod / 2)}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true, readOnlyTime: now.Add(lookbackPeriod / 2)}, {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-b", []uint32{userToken(userID, "zone-b", 2) + 1}, now.Add(-2*lookbackPeriod))}, {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-c", []uint32{userToken(userID, "zone-c", 3) + 1}, now.Add(-2*lookbackPeriod))}, {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-c", []uint32{userToken(userID, "zone-c", 4) + 1}, now.Add(-2*lookbackPeriod))}, - {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 5) + 1}, now.Add(-2*lookbackPeriod)), readOnlyTime: now.Add(lookbackPeriod / 2)}, + {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 5) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true, readOnlyTime: now.Add(lookbackPeriod / 2)}, {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-3", "instance-4", "instance-6"}}, }, }, @@ -2100,12 +2109,24 @@ func TestRing_ShuffleShardWithLookback(t *testing.T) { timeline: []event{ // readOnlyTime is too old to matter {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, - {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnlyTime: now.Add(-2 * lookbackPeriod)}, + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true, readOnlyTime: now.Add(-2 * lookbackPeriod)}, {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-b", []uint32{userToken(userID, "zone-b", 2) + 1}, now.Add(-2*lookbackPeriod))}, {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-c", []uint32{userToken(userID, "zone-c", 3) + 1}, now.Add(-2*lookbackPeriod))}, {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-c", []uint32{userToken(userID, "zone-c", 4) + 1}, now.Add(-2*lookbackPeriod))}, {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 5) + 1}, now.Add(-2*lookbackPeriod))}, - {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-6"}}, + {what: test, shardSize: 3, expected: []string{"instance-1", "instance-3", "instance-6"}}, + }, + }, + "multi zone, with read only instance, with unknown ReadOnlyUpdatedTimestamp": { + timeline: []event{ + {what: add, instanceID: "instance-1", instanceDesc: generateRingInstanceWithInfo("instance-1", "zone-a", []uint32{userToken(userID, "zone-a", 0) + 1}, now.Add(-2*lookbackPeriod))}, + // instance 2 and 4 are included in addition to other instances in the same zone + {what: add, instanceID: "instance-2", instanceDesc: generateRingInstanceWithInfo("instance-2", "zone-b", []uint32{userToken(userID, "zone-b", 1) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true}, + {what: add, instanceID: "instance-3", instanceDesc: generateRingInstanceWithInfo("instance-3", "zone-b", []uint32{userToken(userID, "zone-b", 2) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-4", instanceDesc: generateRingInstanceWithInfo("instance-4", "zone-c", []uint32{userToken(userID, "zone-c", 3) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-5", instanceDesc: generateRingInstanceWithInfo("instance-5", "zone-c", []uint32{userToken(userID, "zone-c", 4) + 1}, now.Add(-2*lookbackPeriod))}, + {what: add, instanceID: "instance-6", instanceDesc: generateRingInstanceWithInfo("instance-6", "zone-c", []uint32{userToken(userID, "zone-c", 5) + 1}, now.Add(-2*lookbackPeriod)), readOnly: true}, + {what: test, shardSize: 3, expected: []string{"instance-1", "instance-2", "instance-3", "instance-4", "instance-6"}}, }, }, } @@ -2132,10 +2153,8 @@ func TestRing_ShuffleShardWithLookback(t *testing.T) { for ix, event := range testData.timeline { switch event.what { case add: - if !event.readOnlyTime.IsZero() { - event.instanceDesc.ReadOnly = true - event.instanceDesc.ReadOnlyUpdatedTimestamp = event.readOnlyTime.Unix() - } + event.instanceDesc.ReadOnly = event.readOnly + event.instanceDesc.ReadOnlyUpdatedTimestamp = timeToUnixSecons(event.readOnlyTime) ringDesc.Ingesters[event.instanceID] = event.instanceDesc ring.ringTokens = ringDesc.GetTokens()