Skip to content

Commit aec7455

Browse files
committed
Limit the number of state_keys that we remember
See #17809 (comment)
1 parent 7e667fe commit aec7455

File tree

2 files changed

+186
-7
lines changed

2 files changed

+186
-7
lines changed

synapse/handlers/sliding_sync/__init__.py

Lines changed: 75 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212
# <https://www.gnu.org/licenses/agpl-3.0.html>.
1313
#
1414

15+
import itertools
1516
import logging
1617
from itertools import chain
1718
from typing import TYPE_CHECKING, AbstractSet, Dict, List, Mapping, Optional, Set, Tuple
@@ -79,6 +80,15 @@
7980
["initial"],
8081
)
8182

83+
# Limit the number of state_keys we should remember sending down the connection for each
84+
# (room_id, user_id). We don't want to store and pull out too much data in the database.
85+
#
86+
# 100 is an arbitrary but small-ish number. The idea is that we probably won't send down
87+
# too many duplicate member state events for a given ongoing conversation if we keep 100
88+
# around. Most rooms don't have 100 members and it takes a while to cycle through 100
89+
# members.
90+
MAX_NUMBER_STATE_KEYS_TO_REMEMBER = 100
91+
8292

8393
class SlidingSyncHandler:
8494
def __init__(self, hs: "HomeServer"):
@@ -1445,13 +1455,42 @@ def _required_state_changes(
14451455
old_state_keys - request_state_keys
14461456
) & changed_state_keys
14471457

1448-
# Always update changes to include the newly added keys
1449-
changes[event_type] = request_state_keys | (
1450-
# Wildcard and lazy state keys are not sticky from previous requests
1458+
# Figure out which state keys we should remember sending down the connection
1459+
inheritable_previous_state_keys = (
1460+
# Retain the previous state_keys that we've sent down before.
1461+
# Wildcard and lazy state keys are not sticky from previous requests.
14511462
(old_state_keys - {StateValues.WILDCARD, StateValues.LAZY})
14521463
- invalidated_state_keys
14531464
)
14541465

1466+
# Always update changes to include the newly added keys, use the new requested
1467+
# set with whatever hasn't been invalidated from the previous set.
1468+
changes[event_type] = request_state_keys | inheritable_previous_state_keys
1469+
# Limit the number of state_keys we should remember sending down the connection
1470+
# for each (room_id, user_id). We don't want to store and pull out too much data
1471+
# in the database.
1472+
#
1473+
# Only remember up to (MAX_NUMBER_STATE_KEYS_TO_REMEMBER) state_keys
1474+
if len(changes[event_type]) > MAX_NUMBER_STATE_KEYS_TO_REMEMBER:
1475+
changes[event_type] = (
1476+
# Make sure the requested keys are still present
1477+
request_state_keys
1478+
|
1479+
# Fill the rest with previous state_keys. Ideally, we could sort these
1480+
# by recency but it's just a set so just pick an arbitrary subset (good
1481+
# enough).
1482+
set(
1483+
itertools.islice(
1484+
inheritable_previous_state_keys,
1485+
# Just taking the difference isn't perfect as there could be
1486+
# overlap in the keys between the requested and previous but we
1487+
# will decide to just take the easy route for now and avoid
1488+
# additional set operations to figure it out.
1489+
MAX_NUMBER_STATE_KEYS_TO_REMEMBER - len(request_state_keys),
1490+
)
1491+
)
1492+
)
1493+
14551494
if StateValues.WILDCARD in old_state_keys:
14561495
# We were previously fetching everything for this type, so we don't need to
14571496
# fetch anything new.
@@ -1502,14 +1541,43 @@ def _required_state_changes(
15021541
old_state_keys - request_state_keys
15031542
) & changed_state_keys
15041543

1544+
# We've expanded the set of state keys, ...
15051545
if request_state_keys - old_state_keys:
1506-
# We've expanded the set of state keys, use the new requested set
1507-
# with whatever hasn't been invalidated from the previous set.
1508-
changes[event_type] = request_state_keys | (
1509-
# Wildcard and lazy state keys are not sticky from previous requests
1546+
# Figure out which state keys we should remember sending down the connection
1547+
inheritable_previous_state_keys = (
1548+
# Retain the previous state_keys that we've sent down before.
1549+
# Wildcard and lazy state keys are not sticky from previous requests.
15101550
(old_state_keys - {StateValues.WILDCARD, StateValues.LAZY})
15111551
- invalidated_state_keys
15121552
)
1553+
1554+
# We've expanded the set of state keys, use the new requested set
1555+
# with whatever hasn't been invalidated from the previous set.
1556+
changes[event_type] = request_state_keys | inheritable_previous_state_keys
1557+
# Limit the number of state_keys we should remember sending down the connection
1558+
# for each (room_id, user_id). We don't want to store and pull out too much data
1559+
# in the database.
1560+
#
1561+
# Only remember up to (MAX_NUMBER_STATE_KEYS_TO_REMEMBER) state_keys
1562+
if len(changes[event_type]) > MAX_NUMBER_STATE_KEYS_TO_REMEMBER:
1563+
changes[event_type] = (
1564+
# Make sure the requested keys are still present
1565+
request_state_keys
1566+
|
1567+
# Fill the rest with previous state_keys. Ideally, we could sort
1568+
# these by recency but it's just a set so just pick an arbitrary
1569+
# subset (good enough).
1570+
set(
1571+
itertools.islice(
1572+
inheritable_previous_state_keys,
1573+
# Just taking the difference isn't perfect as there could be
1574+
# overlap in the keys between the requested and previous but we
1575+
# will decide to just take the easy route for now and avoid
1576+
# additional set operations to figure it out.
1577+
MAX_NUMBER_STATE_KEYS_TO_REMEMBER - len(request_state_keys),
1578+
)
1579+
)
1580+
)
15131581
continue
15141582

15151583
old_state_key_wildcard = StateValues.WILDCARD in old_state_keys

tests/handlers/test_sliding_sync.py

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@
3333
)
3434
from synapse.api.room_versions import RoomVersions
3535
from synapse.handlers.sliding_sync import (
36+
MAX_NUMBER_STATE_KEYS_TO_REMEMBER,
3637
RoomsForUserType,
3738
RoomSyncConfig,
3839
StateValues,
@@ -3319,6 +3320,32 @@ class RequiredStateChangesTestCase(unittest.TestCase):
33193320
),
33203321
),
33213322
),
3323+
(
3324+
"simple_retain_previous_state_keys",
3325+
"""Test adding a state key to the config and retaining a previously sent state_key""",
3326+
RequiredStateChangesTestParameters(
3327+
previous_required_state_map={"type": {"state_key1"}},
3328+
request_required_state_map={"type": {"state_key2", "state_key3"}},
3329+
state_deltas={("type", "state_key2"): "$event_id"},
3330+
expected_with_state_deltas=(
3331+
# We've added a key so we should persist the changed required state
3332+
# config.
3333+
#
3334+
# Retain `state_key1` from the `previous_required_state_map`
3335+
{"type": {"state_key1", "state_key2", "state_key3"}},
3336+
# We should see the new state_keys added
3337+
StateFilter.from_types(
3338+
[("type", "state_key2"), ("type", "state_key3")]
3339+
),
3340+
),
3341+
expected_without_state_deltas=(
3342+
{"type": {"state_key1", "state_key2", "state_key3"}},
3343+
StateFilter.from_types(
3344+
[("type", "state_key2"), ("type", "state_key3")]
3345+
),
3346+
),
3347+
),
3348+
),
33223349
(
33233350
"simple_remove_type",
33243351
"""
@@ -3894,6 +3921,14 @@ class RequiredStateChangesTestCase(unittest.TestCase):
38943921
# sent it before and send the new state. (if we were tracking
38953922
# that we sent any other state, we should still keep track
38963923
# that).
3924+
#
3925+
# This acts the same as the `simple_remove_type` test. It's
3926+
# possible that we could remember the specific `state_keys` that
3927+
# we have sent down before but this currently just acts the same
3928+
# as if a whole `type` was removed. Perhaps it's good that we
3929+
# "garbage collect" and forget what we've sent before for a
3930+
# given `type` when the client stops caring about a certain
3931+
# `type`.
38973932
{},
38983933
# We don't need to request anything more if they are requesting
38993934
# less state now
@@ -4133,3 +4168,79 @@ def test_xxx(
41334168
test_parameters.expected_with_state_deltas[1],
41344169
"added_state_filter does not match (with state_deltas)",
41354170
)
4171+
4172+
@parameterized.expand(
4173+
[
4174+
# Test with a normal arbitrary type (no special meaning)
4175+
("arbitrary_type", "type", set()),
4176+
# Test with membership
4177+
("membership", EventTypes.Member, set()),
4178+
# Test with lazy-loading room members
4179+
("lazy_loading_membership", EventTypes.Member, {StateValues.LAZY}),
4180+
]
4181+
)
4182+
def test_limit_retained_previous_state_keys(
4183+
self,
4184+
_test_label: str,
4185+
event_type: str,
4186+
extra_state_keys: Set[str],
4187+
) -> None:
4188+
"""
4189+
Test that we limit the number of state_keys that we remember but always include
4190+
the state_keys that we've just requested.
4191+
"""
4192+
previous_required_state_map = {
4193+
event_type: {
4194+
# Prefix the state_keys we've "prev_"iously sent so they are easier to
4195+
# identify in our assertions.
4196+
f"prev_state_key{i}"
4197+
for i in range(MAX_NUMBER_STATE_KEYS_TO_REMEMBER - 30)
4198+
}
4199+
| extra_state_keys
4200+
}
4201+
request_required_state_map = {
4202+
event_type: {f"state_key{i}" for i in range(50)} | extra_state_keys
4203+
}
4204+
4205+
# (function under test)
4206+
changed_required_state_map, added_state_filter = _required_state_changes(
4207+
user_id="@user:test",
4208+
prev_required_state_map=previous_required_state_map,
4209+
request_required_state_map=request_required_state_map,
4210+
state_deltas={},
4211+
)
4212+
assert changed_required_state_map is not None
4213+
4214+
# We should only remember up to the maximum number of state keys
4215+
self.assertGreaterEqual(
4216+
len(changed_required_state_map[event_type]),
4217+
# Most of the time this will be `MAX_NUMBER_STATE_KEYS_TO_REMEMBER` but
4218+
# because we are just naively selecting enough previous state_keys to fill
4219+
# the limit, there might be some overlap in what's added back which means we
4220+
# might have slightly less than the limit.
4221+
#
4222+
# `extra_state_keys` overlaps in the previous and requested
4223+
# `required_state_map` so we might see this this scenario.
4224+
MAX_NUMBER_STATE_KEYS_TO_REMEMBER - len(extra_state_keys),
4225+
)
4226+
4227+
# Should include all of the requested state
4228+
self.assertIncludes(
4229+
changed_required_state_map[event_type],
4230+
request_required_state_map[event_type],
4231+
)
4232+
# And the rest is filled with the previous state keys
4233+
#
4234+
# We can't assert the exact state_keys since we don't know the order so we just
4235+
# check that they all start with "prev_" and that we have the correct amount.
4236+
remaining_state_keys = (
4237+
changed_required_state_map[event_type]
4238+
- request_required_state_map[event_type]
4239+
)
4240+
self.assertGreater(
4241+
len(remaining_state_keys),
4242+
0,
4243+
)
4244+
assert all(
4245+
state_key.startswith("prev_") for state_key in remaining_state_keys
4246+
), "Remaining state_keys should be the previous state_keys"

0 commit comments

Comments
 (0)