Skip to content

Commit

Permalink
Fix performance of check_state_groups_and_bump_deletion (#18141)
Browse files Browse the repository at this point in the history
Regressed as part of #18107

This does two things:
1. Only check if the state groups have been deleted when calculating the
event context (as that's when we will insert them). This avoids lots of
checks for read operations.
2. Don't lock the `state_groups` rows when doing the check. This adds
overhead, and it doesn't prevent any races.
  • Loading branch information
erikjohnston authored Feb 7, 2025
1 parent 553e988 commit dcf7b39
Show file tree
Hide file tree
Showing 4 changed files with 46 additions and 17 deletions.
1 change: 1 addition & 0 deletions changelog.d/18141.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Fix regression in performance of sending events due to superfluous reads and locks. Introduced in v1.124.0rc1.
32 changes: 22 additions & 10 deletions synapse/state/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,28 @@ async def calculate_context_info(
await_full_state=False,
)

# Ensure we still have the state groups we're relying on, and bump
# their usage time to avoid them being deleted from under us.
if entry.state_group:
missing_state_group = await self._state_deletion_store.check_state_groups_and_bump_deletion(
{entry.state_group}
)
if missing_state_group:
raise Exception(f"Missing state group: {entry.state_group}")
elif entry.prev_group:
# We only rely on the prev group when persisting the event if we
# don't have an `entry.state_group`.
missing_state_group = await self._state_deletion_store.check_state_groups_and_bump_deletion(
{entry.prev_group}
)

if missing_state_group:
# If we're missing the prev group then we can just clear the
# entries, and rely on `entry._state` (which must exist if
# `entry.state_group` is None)
entry.prev_group = None
entry.delta_ids = None

state_group_before_event_prev_group = entry.prev_group
deltas_to_state_group_before_event = entry.delta_ids
state_ids_before_event = None
Expand Down Expand Up @@ -519,16 +541,6 @@ async def resolve_state_groups_for_events(
state_group_id
)

if prev_group:
# Ensure that we still have the prev group, and ensure we don't
# delete it while we're persisting the event.
missing_state_group = await self._state_deletion_store.check_state_groups_and_bump_deletion(
{prev_group}
)
if missing_state_group:
prev_group = None
delta_ids = None

return _StateCacheEntry(
state=None,
state_group=state_group_id,
Expand Down
26 changes: 21 additions & 5 deletions synapse/storage/databases/state/deletion.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,12 +123,28 @@ async def check_state_groups_and_bump_deletion(
"check_state_groups_and_bump_deletion",
self._check_state_groups_and_bump_deletion_txn,
state_groups,
# We don't need to lock if we're just doing a quick check, as the
# lock doesn't prevent any races here.
lock=False,
)

def _check_state_groups_and_bump_deletion_txn(
self, txn: LoggingTransaction, state_groups: AbstractSet[int]
self, txn: LoggingTransaction, state_groups: AbstractSet[int], lock: bool = True
) -> Collection[int]:
existing_state_groups = self._get_existing_groups_with_lock(txn, state_groups)
"""Checks to make sure that the state groups haven't been deleted, and
if they're pending deletion we delay it (allowing time for any event
that will use them to finish persisting).
The `lock` flag sets if we should lock the `state_group` rows we're
checking, which we should do when storing new groups.
Returns:
The state groups that are missing, if any.
"""

existing_state_groups = self._get_existing_groups_with_lock(
txn, state_groups, lock=lock
)

self._bump_deletion_txn(txn, existing_state_groups)

Expand Down Expand Up @@ -188,18 +204,18 @@ def _bump_deletion_txn(
)

def _get_existing_groups_with_lock(
self, txn: LoggingTransaction, state_groups: Collection[int]
self, txn: LoggingTransaction, state_groups: Collection[int], lock: bool = True
) -> AbstractSet[int]:
"""Return which of the given state groups are in the database, and locks
those rows with `KEY SHARE` to ensure they don't get concurrently
deleted."""
deleted (if `lock` is true)."""
clause, args = make_in_list_sql_clause(self.db_pool.engine, "id", state_groups)

sql = f"""
SELECT id FROM state_groups
WHERE {clause}
"""
if isinstance(self.db_pool.engine, PostgresEngine):
if lock and isinstance(self.db_pool.engine, PostgresEngine):
# On postgres we add a row level lock to the rows to ensure that we
# conflict with any concurrent DELETEs. `FOR KEY SHARE` lock will
# not conflict with other read
Expand Down
4 changes: 2 additions & 2 deletions tests/rest/client/test_rooms.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,7 +742,7 @@ def test_post_room_no_keys(self) -> None:
self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
self.assertTrue("room_id" in channel.json_body)
assert channel.resource_usage is not None
self.assertEqual(36, channel.resource_usage.db_txn_count)
self.assertEqual(35, channel.resource_usage.db_txn_count)

def test_post_room_initial_state(self) -> None:
# POST with initial_state config key, expect new room id
Expand All @@ -755,7 +755,7 @@ def test_post_room_initial_state(self) -> None:
self.assertEqual(HTTPStatus.OK, channel.code, channel.result)
self.assertTrue("room_id" in channel.json_body)
assert channel.resource_usage is not None
self.assertEqual(38, channel.resource_usage.db_txn_count)
self.assertEqual(37, channel.resource_usage.db_txn_count)

def test_post_room_visibility_key(self) -> None:
# POST with visibility config key, expect new room id
Expand Down

0 comments on commit dcf7b39

Please sign in to comment.