diff --git a/changelog.d/9115.misc b/changelog.d/9115.misc new file mode 100644 index 0000000000..346741d982 --- /dev/null +++ b/changelog.d/9115.misc @@ -0,0 +1 @@ +Improve efficiency of large state resolutions. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index 6cfadc2b4e..a19d65ad23 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -49,6 +49,7 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage.background_updates import BackgroundUpdater from synapse.storage.engines import BaseDatabaseEngine, PostgresEngine, Sqlite3Engine from synapse.storage.types import Connection, Cursor +from synapse.storage.util.sequence import build_sequence_generator from synapse.types import Collection # python 3 does not have a maximum int value @@ -412,6 +413,16 @@ class DatabasePool: self._check_safe_to_upsert, ) + # We define this sequence here so that it can be referenced from both + # the DataStore and PersistEventStore. + def get_chain_id_txn(txn): + txn.execute("SELECT COALESCE(max(chain_id), 0) FROM event_auth_chains") + return txn.fetchone()[0] + + self.event_chain_id_gen = build_sequence_generator( + engine, get_chain_id_txn, "event_auth_chain_id" + ) + def is_running(self) -> bool: """Is the database pool currently running """ diff --git a/synapse/storage/databases/main/events.py b/synapse/storage/databases/main/events.py index e0fbcc58cf..3216b3f3c8 100644 --- a/synapse/storage/databases/main/events.py +++ b/synapse/storage/databases/main/events.py @@ -43,7 +43,6 @@ from synapse.storage._base import db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool, LoggingTransaction from synapse.storage.databases.main.search import SearchEntry from synapse.storage.util.id_generators import MultiWriterIdGenerator -from synapse.storage.util.sequence import build_sequence_generator from synapse.types import StateMap, get_domain_from_id from synapse.util import json_encoder from synapse.util.iterutils import batch_iter, sorted_topologically @@ -100,14 +99,6 @@ class PersistEventsStore: self._clock = hs.get_clock() self._instance_name = hs.get_instance_name() - def get_chain_id_txn(txn): - txn.execute("SELECT COALESCE(max(chain_id), 0) FROM event_auth_chains") - return txn.fetchone()[0] - - self._event_chain_id_gen = build_sequence_generator( - db.engine, get_chain_id_txn, "event_auth_chain_id" - ) - self._ephemeral_messages_enabled = hs.config.enable_ephemeral_messages self.is_mine_id = hs.is_mine_id @@ -479,12 +470,13 @@ class PersistEventsStore: event_to_room_id = {e.event_id: e.room_id for e in state_events.values()} self._add_chain_cover_index( - txn, event_to_room_id, event_to_types, event_to_auth_chain + txn, self.db_pool, event_to_room_id, event_to_types, event_to_auth_chain, ) + @staticmethod def _add_chain_cover_index( - self, txn, + db_pool: DatabasePool, event_to_room_id: Dict[str, str], event_to_types: Dict[str, Tuple[str, str]], event_to_auth_chain: Dict[str, List[str]], @@ -507,7 +499,7 @@ class PersistEventsStore: # We check if there are any events that need to be handled in the rooms # we're looking at. These should just be out of band memberships, where # we didn't have the auth chain when we first persisted. - rows = self.db_pool.simple_select_many_txn( + rows = db_pool.simple_select_many_txn( txn, table="event_auth_chain_to_calculate", keyvalues={}, @@ -523,7 +515,7 @@ class PersistEventsStore: # (We could pull out the auth events for all rows at once using # simple_select_many, but this case happens rarely and almost always # with a single row.) - auth_events = self.db_pool.simple_select_onecol_txn( + auth_events = db_pool.simple_select_onecol_txn( txn, "event_auth", keyvalues={"event_id": event_id}, retcol="auth_id", ) @@ -572,9 +564,7 @@ class PersistEventsStore: events_to_calc_chain_id_for.add(auth_id) - event_to_auth_chain[ - auth_id - ] = self.db_pool.simple_select_onecol_txn( + event_to_auth_chain[auth_id] = db_pool.simple_select_onecol_txn( txn, "event_auth", keyvalues={"event_id": auth_id}, @@ -606,7 +596,7 @@ class PersistEventsStore: room_id = event_to_room_id.get(event_id) if room_id: e_type, state_key = event_to_types[event_id] - self.db_pool.simple_insert_txn( + db_pool.simple_insert_txn( txn, table="event_auth_chain_to_calculate", values={ @@ -651,7 +641,7 @@ class PersistEventsStore: proposed_new_id = existing_chain_id[0] proposed_new_seq = existing_chain_id[1] + 1 if (proposed_new_id, proposed_new_seq) not in chains_tuples_allocated: - already_allocated = self.db_pool.simple_select_one_onecol_txn( + already_allocated = db_pool.simple_select_one_onecol_txn( txn, table="event_auth_chains", keyvalues={ @@ -672,14 +662,14 @@ class PersistEventsStore: ) if not new_chain_tuple: - new_chain_tuple = (self._event_chain_id_gen.get_next_id_txn(txn), 1) + new_chain_tuple = (db_pool.event_chain_id_gen.get_next_id_txn(txn), 1) chains_tuples_allocated.add(new_chain_tuple) chain_map[event_id] = new_chain_tuple new_chain_tuples[event_id] = new_chain_tuple - self.db_pool.simple_insert_many_txn( + db_pool.simple_insert_many_txn( txn, table="event_auth_chains", values=[ @@ -688,7 +678,7 @@ class PersistEventsStore: ], ) - self.db_pool.simple_delete_many_txn( + db_pool.simple_delete_many_txn( txn, table="event_auth_chain_to_calculate", keyvalues={}, @@ -721,7 +711,7 @@ class PersistEventsStore: # Step 1, fetch all existing links from all the chains we've seen # referenced. chain_links = _LinkMap() - rows = self.db_pool.simple_select_many_txn( + rows = db_pool.simple_select_many_txn( txn, table="event_auth_chain_links", column="origin_chain_id", @@ -785,7 +775,7 @@ class PersistEventsStore: (chain_id, sequence_number), (target_id, target_seq) ) - self.db_pool.simple_insert_many_txn( + db_pool.simple_insert_many_txn( txn, table="event_auth_chain_links", values=[ diff --git a/synapse/storage/databases/main/events_bg_updates.py b/synapse/storage/databases/main/events_bg_updates.py index 90a40a92b4..7128dc1742 100644 --- a/synapse/storage/databases/main/events_bg_updates.py +++ b/synapse/storage/databases/main/events_bg_updates.py @@ -21,6 +21,7 @@ from synapse.api.room_versions import KNOWN_ROOM_VERSIONS from synapse.events import make_event_from_dict from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import DatabasePool, make_tuple_comparison_clause +from synapse.storage.databases.main.events import PersistEventsStore from synapse.storage.types import Cursor from synapse.types import JsonDict @@ -833,8 +834,12 @@ class EventsBackgroundUpdatesStore(SQLBaseStore): # # Annoyingly we need to gut wrench into the persit event store so that # we can reuse the function to calculate the chain cover for rooms. - self.hs.get_datastores().persist_events._add_chain_cover_index( - txn, event_to_room_id, event_to_types, event_to_auth_chain, + PersistEventsStore._add_chain_cover_index( + txn, + self.db_pool, + event_to_room_id, + event_to_types, + event_to_auth_chain, ) return new_last_depth, new_last_stream, count diff --git a/synapse/storage/util/sequence.py b/synapse/storage/util/sequence.py index 4386b6101e..412df6b8ef 100644 --- a/synapse/storage/util/sequence.py +++ b/synapse/storage/util/sequence.py @@ -15,9 +15,8 @@ import abc import logging import threading -from typing import Callable, List, Optional +from typing import TYPE_CHECKING, Callable, List, Optional -from synapse.storage.database import LoggingDatabaseConnection from synapse.storage.engines import ( BaseDatabaseEngine, IncorrectDatabaseSetup, @@ -25,6 +24,9 @@ from synapse.storage.engines import ( ) from synapse.storage.types import Connection, Cursor +if TYPE_CHECKING: + from synapse.storage.database import LoggingDatabaseConnection + logger = logging.getLogger(__name__) @@ -55,7 +57,7 @@ class SequenceGenerator(metaclass=abc.ABCMeta): @abc.abstractmethod def check_consistency( self, - db_conn: LoggingDatabaseConnection, + db_conn: "LoggingDatabaseConnection", table: str, id_column: str, positive: bool = True, @@ -88,7 +90,7 @@ class PostgresSequenceGenerator(SequenceGenerator): def check_consistency( self, - db_conn: LoggingDatabaseConnection, + db_conn: "LoggingDatabaseConnection", table: str, id_column: str, positive: bool = True,