From 9b334b3f97057ac145622d2e4d0ad036ef27b468 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:01:41 +0000 Subject: [PATCH 01/64] WIP experiment in lazyloading room members --- synapse/handlers/sync.py | 43 ++++++++++++++++++++++++++---------- synapse/storage/state.py | 47 +++++++++++++++++++++++++++++++++++----- 2 files changed, 73 insertions(+), 17 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0f713ce038..809e9fece9 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -399,7 +399,7 @@ class SyncHandler(object): )) @defer.inlineCallbacks - def get_state_after_event(self, event): + def get_state_after_event(self, event, types=None): """ Get the room state after the given event @@ -409,14 +409,14 @@ class SyncHandler(object): Returns: A Deferred map from ((type, state_key)->Event) """ - state_ids = yield self.store.get_state_ids_for_event(event.event_id) + state_ids = yield self.store.get_state_ids_for_event(event.event_id, types) if event.is_state(): state_ids = state_ids.copy() state_ids[(event.type, event.state_key)] = event.event_id defer.returnValue(state_ids) @defer.inlineCallbacks - def get_state_at(self, room_id, stream_position): + def get_state_at(self, room_id, stream_position, types=None): """ Get the room state at a particular stream position Args: @@ -432,7 +432,7 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] - state = yield self.get_state_after_event(last_event) + state = yield self.get_state_after_event(last_event, types) else: # no events in this room - so presumably no state @@ -441,7 +441,7 @@ class SyncHandler(object): @defer.inlineCallbacks def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token, - full_state): + full_state, filter_members): """ Works out the differnce in state between the start of the timeline and the previous sync. @@ -454,6 +454,8 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. + filter_members(bool): Whether to only return state for members + referenced in this timeline segment Returns: A deferred new event dictionary @@ -464,18 +466,35 @@ class SyncHandler(object): # TODO(mjark) Check for new redactions in the state events. with Measure(self.clock, "compute_state_delta"): + + types = None + if filter_members: + # We only request state for the members needed to display the + # timeline: + types = ( + (EventTypes.Member, state_key) + for state_key in set( + event.sender # FIXME: we also care about targets etc. + for event in batch.events + ) + ) + types.append((None, None)) # don't just filter to room members + + # TODO: we should opportunistically deduplicate these members too + # within the same sync series (based on an in-memory cache) + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id + batch.events[-1].event_id, types=types ) state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id + batch.events[0].event_id, types=types ) else: current_state_ids = yield self.get_state_at( - room_id, stream_position=now_token + room_id, stream_position=now_token, types=types ) state_ids = current_state_ids @@ -493,15 +512,15 @@ class SyncHandler(object): ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( - room_id, stream_position=since_token + room_id, stream_position=since_token, types=types ) current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id + batch.events[-1].event_id, types=types ) state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id + batch.events[0].event_id, types=types ) timeline_state = { @@ -1325,7 +1344,7 @@ class SyncHandler(object): state = yield self.compute_state_delta( room_id, batch, sync_config, since_token, now_token, - full_state=full_state + full_state=full_state, filter_members=True ) if room_builder.rtype == "joined": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..da6bb685fa 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -198,8 +198,15 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_state_groups_from_groups_txn(self, txn, groups, types=None): results = {group: {} for group in groups} + + include_other_types = False + if types is not None: - types = list(set(types)) # deduplicate types list + type_set = set(types) + if (None, None) in type_set: + include_other_types = True + type_set.remove((None, None)) + types = list(type_set) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -238,11 +245,21 @@ class StateGroupWorkerStore(SQLBaseStore): if types: clause_to_args = [ ( - "AND type = ? AND state_key = ?", - (etype, state_key) + "AND type = ? AND state_key = ?" if state_key is not None else "AND type = ?", + (etype, state_key) if state_key is not None else (etype) ) for etype, state_key in types ] + + if include_other_types: + # XXX: check whether this slows postgres down like a list of + # ORs does too? + clause_to_args.append( + ( + "AND type <> ? " * len(types), + [t for (t, _) in types] + ) + ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -263,6 +280,10 @@ class StateGroupWorkerStore(SQLBaseStore): where_clause = "AND (%s)" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) + if include_other_types: + where_clause += " AND (%s)" % ( + " AND ".join(["type <> ?"] * len(types)), + ) else: where_clause = "" @@ -449,17 +470,27 @@ class StateGroupWorkerStore(SQLBaseStore): group: The state group to lookup types (list): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the - `type`. + `type`. Presence of type of `None` indicates that types not + in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} missing_types = set() + include_other_types = False + for typ, state_key in types: key = (typ, state_key) + + if typ is None: + include_other_types = True + next + if state_key is None: type_to_key[typ] = None + # XXX: why do we mark the type as missing from our cache just + # because we weren't filtering on a specific value of state_key? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -478,7 +509,7 @@ class StateGroupWorkerStore(SQLBaseStore): return True if state_key in valid_state_keys: return True - return False + return include_other_types got_all = is_all or not missing_types @@ -507,6 +538,12 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. + + XXX: is it really true that `state_key` of None in `types` matches all + state_keys? it looks like _get-some_state_from_cache does the right thing, + but _get_state_groups_from_groups_txn treats ths None is turned into + 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew + I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) From 87133652657c5073616419b0afc533eac6ae6750 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:10:25 +0000 Subject: [PATCH 02/64] typos --- synapse/handlers/sync.py | 4 ++-- synapse/storage/state.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 809e9fece9..fa730ca760 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -471,13 +471,13 @@ class SyncHandler(object): if filter_members: # We only request state for the members needed to display the # timeline: - types = ( + types = [ (EventTypes.Member, state_key) for state_key in set( event.sender # FIXME: we also care about targets etc. for event in batch.events ) - ) + ] types.append((None, None)) # don't just filter to room members # TODO: we should opportunistically deduplicate these members too diff --git a/synapse/storage/state.py b/synapse/storage/state.py index da6bb685fa..0238200286 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -301,6 +301,8 @@ class StateGroupWorkerStore(SQLBaseStore): args = [next_group] if types: args.extend(i for typ in types for i in typ) + if include_other_types: + args.extend(typ for (typ, _) in types) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" From 97c0496cfa89b037d89fccd05dd03442b80e07fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 00:27:06 +0000 Subject: [PATCH 03/64] fix sqlite where clause --- synapse/storage/state.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 0238200286..b796d3c995 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -277,13 +277,14 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: if types is not None: - where_clause = "AND (%s)" % ( + where_clause = "AND (%s" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) if include_other_types: - where_clause += " AND (%s)" % ( + where_clause += " OR (%s)" % ( " AND ".join(["type <> ?"] * len(types)), ) + where_clause += ")" else: where_clause = "" From fdedcd1f4ddeaa3ed5bfd3c05ab2977b4e8ed457 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 01:39:06 +0000 Subject: [PATCH 04/64] correctly handle None state_keys and fix include_other_types thinko --- synapse/storage/state.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b796d3c995..405e6b6770 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -276,15 +276,23 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?) OR " + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?) OR " + where_args.extend([typ[0], typ[1]]) + if include_other_types: - where_clause += " OR (%s)" % ( + where_clause += "(%s) OR " % ( " AND ".join(["type <> ?"] * len(types)), ) - where_clause += ")" + where_args.extend(t for (t, _) in types) + where_clause += "0)" # 0 to terminate the last OR else: where_clause = "" @@ -301,9 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) - if include_other_types: - args.extend(typ for (typ, _) in types) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" @@ -507,12 +513,12 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return False + return include_other_types if valid_state_keys is None: return True if state_key in valid_state_keys: return True - return include_other_types + return False got_all = is_all or not missing_types From 1b1c13777154b5b0cf8bf8cf809381f889a2a82d Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 17:52:52 +0000 Subject: [PATCH 05/64] fix bug #2926 --- synapse/storage/state.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..783cebb351 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,6 +240,10 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) + ) if state_key is not None else + ( + "AND type = ?", + (etype) ) for etype, state_key in types ] @@ -259,10 +263,19 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?)" + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?)" + where_args.extend([typ[0], typ[1]]) + if typ != types[-1]: + where_clause += " OR " + where_clause += ")" else: where_clause = "" @@ -279,7 +292,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" From 52f7e23c7276b2848aa5291d8b78875b7c32a658 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:07:55 +0000 Subject: [PATCH 06/64] PR feedbackz --- synapse/storage/state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 783cebb351..77259a3141 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,10 +240,9 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) - ) if state_key is not None else - ( + ) if state_key is not None else ( "AND type = ?", - (etype) + (etype,) ) for etype, state_key in types ] From b2aba9e43053f9f297671fce0051bfc18a8b655a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:13:44 +0000 Subject: [PATCH 07/64] build where_clause sanely --- synapse/storage/state.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 77259a3141..82740266bf 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -263,18 +263,16 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: where_args = [] + where_clauses = [] if types is not None: - where_clause = "AND (" for typ in types: if typ[1] is None: - where_clause += "(type = ?)" + where_clauses.append("(type = ?)") where_args.extend(typ[0]) else: - where_clause += "(type = ? AND state_key = ?)" + where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if typ != types[-1]: - where_clause += " OR " - where_clause += ")" + where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" From 865377a70d9d7db27b89348d2ebbd394f701c490 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:45:36 +0000 Subject: [PATCH 08/64] disable optimisation for searching for state groups when type filter includes wildcards on state_key --- synapse/storage/state.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 82740266bf..39f73afaa2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -264,11 +264,13 @@ class StateGroupWorkerStore(SQLBaseStore): else: where_args = [] where_clauses = [] + wildcard_types = False if types is not None: for typ in types: if typ[1] is None: where_clauses.append("(type = ?)") where_args.extend(typ[0]) + wildcard_types = True else: where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) @@ -302,9 +304,17 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the lengths match then we must have all the types, - # so no need to go walk further down the tree. - if types is not None and len(results[group]) == len(types): + # If the number of entries inthe (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + types is not None and + not wildcard_types and + len(results[group]) == len(types) + ): break next_group = self._simple_select_one_onecol_txn( From afbf4d3dccb3d18276e4b119b0267490ca522b4b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:48:04 +0000 Subject: [PATCH 09/64] typoe --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 39f73afaa2..ffa4246031 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -304,7 +304,7 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the number of entries inthe (type,state_key)->event_id dict + # If the number of entries in the (type,state_key)->event_id dict # matches the number of (type,state_keys) types we were searching # for, then we must have found them all, so no need to go walk # further down the tree... UNLESS our types filter contained From 14a9d2f73d50225f190f42e270cbf9ef7447bd8c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:03:42 +0000 Subject: [PATCH 10/64] ensure we always include the members for a given timeline block --- synapse/handlers/sync.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index fa730ca760..c754cfdeeb 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -468,6 +468,8 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None + member_state_ids = {} + if filter_members: # We only request state for the members needed to display the # timeline: @@ -492,6 +494,13 @@ class SyncHandler(object): state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types ) + + if filter_members: + member_state_ids = { + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.member + } + else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types @@ -499,6 +508,12 @@ class SyncHandler(object): state_ids = current_state_ids + if filter_members: + member_state_ids = { + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.member + } + timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -507,6 +522,7 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, + timeline_start_members=member_state_ids, previous={}, current=current_state_ids, ) @@ -523,6 +539,12 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) + if filter_members: + member_state_ids = { + t: state_at_timeline_start[t] + for t in state_ids if t[0] == EventTypes.member + } + timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -531,6 +553,7 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, + timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, ) @@ -1440,12 +1463,16 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, previous, current): +def _calculate_state(timeline_contains, timeline_start, timeline_start_members, + previous, current): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline + timeline_start_members (dict): state at the start of the timeline + for room members who participate in this chunk of timeline. + Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1464,11 +1491,12 @@ def _calculate_state(timeline_contains, timeline_start, previous, current): } c_ids = set(e for e in current.values()) - tc_ids = set(e for e in timeline_contains.values()) - p_ids = set(e for e in previous.values()) ts_ids = set(e for e in timeline_start.values()) + tsm_ids = set(e for e in timeline_start_members.values()) + p_ids = set(e for e in previous.values()) + tc_ids = set(e for e in timeline_contains.values()) - state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids return { event_id_to_key[e]: e for e in state_ids From f0f9a0605b1bddc1b01d1bbb6af93f00763b8496 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:12:15 +0000 Subject: [PATCH 11/64] remove comment now #2969 is fixed --- synapse/storage/state.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4ab16e18b2..4291cde7ab 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -561,12 +561,6 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. - - XXX: is it really true that `state_key` of None in `types` matches all - state_keys? it looks like _get-some_state_from_cache does the right thing, - but _get_state_groups_from_groups_txn treats ths None is turned into - 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew - I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) From ccca02846d07124f537b0c475308f9a26bfb3fb1 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:31:41 +0000 Subject: [PATCH 12/64] make it work --- synapse/handlers/sync.py | 6 +++--- synapse/storage/state.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c754cfdeeb..c05e3d107f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -498,7 +498,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } else: @@ -511,7 +511,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { @@ -542,7 +542,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4291cde7ab..9c9994c073 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,10 +257,11 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? + unique_types = set([ t for (t, _) in types ]) clause_to_args.append( ( - "AND type <> ? " * len(types), - [t for (t, _) in types] + "AND type <> ? " * len(unique_types), + list(unique_types) ) ) else: @@ -293,10 +294,11 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: + unique_types = set([ t for (t, _) in types ]) where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(types)) + ")" + "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) - where_args.extend(t for (t, _) in types) + where_args.extend(list(unique_types)) where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: From c9d72e4571752554dfe01d755ae23f55c5f84ade Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 23:46:45 +0000 Subject: [PATCH 13/64] oops --- synapse/handlers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c05e3d107f..887624c431 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -542,7 +542,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_at_timeline_start if t[0] == EventTypes.Member } timeline_state = { From 4d0cfef6ee023bfe83113a0378321830ebde1619 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Mar 2018 00:02:20 +0000 Subject: [PATCH 14/64] add copyright to nudge CI --- synapse/handlers/sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 887624c431..edbd2ae771 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 - 2016 OpenMarket Ltd +# Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 9f77001e2747c36046e136c5d3c706c0aef54b15 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Mar 2018 00:07:47 +0000 Subject: [PATCH 15/64] pep8 --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 9c9994c073..55159e64d0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,7 +257,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -294,7 +294,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) From 3bc5bd2d22e6b53ec1f89760301df1517e71b53a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 00:52:04 +0000 Subject: [PATCH 16/64] make incr syncs work --- synapse/handlers/sync.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index edbd2ae771..84c894ca4a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -499,7 +499,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_ids if state_ids[t][0] == EventTypes.Member } else: @@ -512,7 +512,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_ids if state_ids[t][0] == EventTypes.Member } timeline_state = { @@ -543,7 +543,8 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member + for t in state_at_timeline_start + if state_at_timeline_start[t][0] == EventTypes.Member } timeline_state = { From bf49d2dca8db6d82f09441a35cd3655c746b6b4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:38:43 +0000 Subject: [PATCH 17/64] Replace some ujson with simplejson to make it work --- synapse/http/server.py | 3 ++- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/storage/events.py | 2 +- synapse/storage/events_worker.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 4b567215c8..3c7a0ef97a 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,6 +38,7 @@ import collections import logging import urllib import ujson +import simplejson logger = logging.getLogger(__name__) @@ -462,7 +463,7 @@ def respond_with_json(request, code, json_object, send_cors=False, json_bytes = encode_canonical_json(json_object) else: # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a0a8e4b8e4..eb91c0b293 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3890878170..9fc65229fd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,7 @@ from functools import wraps import synapse.metrics import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 86c3b48ad4..2e23dd78ba 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -28,7 +28,7 @@ from synapse.api.errors import SynapseError from collections import namedtuple import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 From 5b3b3aada8952b53f82723227c9758ed47450a2e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:17:34 +0000 Subject: [PATCH 18/64] simplify timeline_start_members --- synapse/handlers/sync.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 84c894ca4a..ffb4f7915e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -496,12 +496,6 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) - if filter_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if state_ids[t][0] == EventTypes.Member - } - else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types @@ -509,11 +503,13 @@ class SyncHandler(object): state_ids = current_state_ids - if filter_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if state_ids[t][0] == EventTypes.Member - } + if filter_members: + logger.info("Finding members from %r", state_ids) + member_state_ids = { + e: state_ids[e] + for e in state_ids if state_ids[e][0] == EventTypes.Member + } + logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id @@ -541,11 +537,14 @@ class SyncHandler(object): ) if filter_members: + logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start - if state_at_timeline_start[t][0] == EventTypes.Member + e: state_at_timeline_start[e] + for e in state_at_timeline_start + if state_at_timeline_start[e][0] == EventTypes.Member } + logger.info("Found members %r", member_state_ids) + timeline_state = { (event.type, event.state_key): event.event_id From f7dcc404f216383bfd62e4611c6a28c3f13576dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:37:53 +0000 Subject: [PATCH 19/64] add state_ids for timeline entries --- synapse/handlers/sync.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ffb4f7915e..9b7e598e74 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -474,6 +474,7 @@ class SyncHandler(object): if filter_members: # We only request state for the members needed to display the # timeline: + types = [ (EventTypes.Member, state_key) for state_key in set( @@ -481,11 +482,14 @@ class SyncHandler(object): for event in batch.events ) ] - types.append((None, None)) # don't just filter to room members - # TODO: we should opportunistically deduplicate these members too + # TODO: we should opportunistically deduplicate these members here # within the same sync series (based on an in-memory cache) + if not types: + filter_members = False + types.append((None, None)) # don't just filter to room members + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( @@ -545,7 +549,6 @@ class SyncHandler(object): } logger.info("Found members %r", member_state_ids) - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -559,7 +562,14 @@ class SyncHandler(object): current=current_state_ids, ) else: - state_ids = {} + if filter_members: + # strip off the (None, None) and filter to just room members + types = types[:-1] + state_ids = yield self.store.get_state_ids_for_event( + batch.events[0].event_id, types=types + ) + else: + state_ids = {} state = {} if state_ids: From 4f0493c850d4611e8ada42c1de54a18e8dc15a37 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:43:37 +0000 Subject: [PATCH 20/64] fix tsm search again --- synapse/handlers/sync.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9b7e598e74..4bf85a128f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -510,8 +510,8 @@ class SyncHandler(object): if filter_members: logger.info("Finding members from %r", state_ids) member_state_ids = { - e: state_ids[e] - for e in state_ids if state_ids[e][0] == EventTypes.Member + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.Member } logger.info("Found members %r", member_state_ids) @@ -543,9 +543,8 @@ class SyncHandler(object): if filter_members: logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { - e: state_at_timeline_start[e] - for e in state_at_timeline_start - if state_at_timeline_start[e][0] == EventTypes.Member + t: state_at_timeline_start[t] + for t in state_at_timeline_start if t[0] == EventTypes.Member } logger.info("Found members %r", member_state_ids) From fc5397fdf5acefd33bd3b808b6d8cc7c31b69b55 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:44:55 +0000 Subject: [PATCH 21/64] remove debug --- synapse/handlers/sync.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4bf85a128f..b7f42bd594 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -508,12 +508,10 @@ class SyncHandler(object): state_ids = current_state_ids if filter_members: - logger.info("Finding members from %r", state_ids) member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member } - logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id @@ -541,12 +539,10 @@ class SyncHandler(object): ) if filter_members: - logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member } - logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id From 0b56290f0b14025939515d01aa72f512f0c629dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:45:49 +0000 Subject: [PATCH 22/64] remove stale import --- synapse/http/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 3c7a0ef97a..e64aa92729 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -37,7 +37,6 @@ from twisted.web.util import redirectTo import collections import logging import urllib -import ujson import simplejson logger = logging.getLogger(__name__) From 366f730bf697fe8fbb18a509ec1852987bc80410 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 18 Mar 2018 21:40:35 +0000 Subject: [PATCH 23/64] only get member state IDs for incremental syncs if we're filtering --- synapse/handlers/sync.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b7f42bd594..6b57afd97b 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -557,14 +557,14 @@ class SyncHandler(object): current=current_state_ids, ) else: + state_ids = {} if filter_members: # strip off the (None, None) and filter to just room members types = types[:-1] - state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types - ) - else: - state_ids = {} + if types: + state_ids = yield self.store.get_state_ids_for_event( + batch.events[0].event_id, types=types + ) state = {} if state_ids: From 478af0f72005708dbbed23e30c547c3d66c07c0e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 01:00:12 +0000 Subject: [PATCH 24/64] reshuffle todo & comments --- synapse/handlers/sync.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6b57afd97b..76f5057377 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -483,11 +483,15 @@ class SyncHandler(object): ) ] - # TODO: we should opportunistically deduplicate these members here - # within the same sync series (based on an in-memory cache) + # We can't remove redundant member types at this stage as it has + # to be done based on event_id, and we don't have the member + # event ids until we've pulled them out of the DB. if not types: + # an optimisation to stop needlessly trying to calculate + # member_state_ids filter_members = False + types.append((None, None)) # don't just filter to room members if full_state: @@ -559,6 +563,10 @@ class SyncHandler(object): else: state_ids = {} if filter_members: + # TODO: filter out redundant members based on their mxids (not their + # event_ids) at this point. We know we can do it based on mxid as this + # is an non-gappy incremental sync. + # strip off the (None, None) and filter to just room members types = types[:-1] if types: From b2f22829475ccfe19e994aedddb8d04995018bf4 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 01:15:13 +0000 Subject: [PATCH 25/64] make lazy_load_members configurable in filters --- synapse/api/filtering.py | 6 ++++++ synapse/handlers/sync.py | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 83206348e5..339e4a31d6 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -260,6 +260,9 @@ class FilterCollection(object): def ephemeral_limit(self): return self._room_ephemeral_filter.limit() + def lazy_load_members(self): + return self._room_state_filter.lazy_load_members() + def filter_presence(self, events): return self._presence_filter.filter(events) @@ -416,6 +419,9 @@ class Filter(object): def limit(self): return self.filter_json.get("limit", 10) + def lazy_load_members(self): + return self.filter_json.get("lazy_load_members", False) + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 76f5057377..f521d22e91 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -442,7 +442,7 @@ class SyncHandler(object): @defer.inlineCallbacks def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token, - full_state, filter_members): + full_state): """ Works out the differnce in state between the start of the timeline and the previous sync. @@ -455,7 +455,7 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. - filter_members(bool): Whether to only return state for members + lazy_load_members(bool): Whether to only return state for members referenced in this timeline segment Returns: @@ -470,8 +470,9 @@ class SyncHandler(object): types = None member_state_ids = {} + lazy_load_members = sync_config.filter_collection.lazy_load_members() - if filter_members: + if lazy_load_members: # We only request state for the members needed to display the # timeline: @@ -490,7 +491,7 @@ class SyncHandler(object): if not types: # an optimisation to stop needlessly trying to calculate # member_state_ids - filter_members = False + lazy_load_members = False types.append((None, None)) # don't just filter to room members @@ -511,7 +512,7 @@ class SyncHandler(object): state_ids = current_state_ids - if filter_members: + if lazy_load_members: member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member @@ -542,7 +543,7 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) - if filter_members: + if lazy_load_members: member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member @@ -562,7 +563,7 @@ class SyncHandler(object): ) else: state_ids = {} - if filter_members: + if lazy_load_members: # TODO: filter out redundant members based on their mxids (not their # event_ids) at this point. We know we can do it based on mxid as this # is an non-gappy incremental sync. @@ -1380,8 +1381,7 @@ class SyncHandler(object): return state = yield self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, - full_state=full_state, filter_members=True + room_id, batch, sync_config, since_token, now_token, full_state=full_state ) if room_builder.rtype == "joined": From a6c8f7c875348ff8d63a7032c2f73a08551c516c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 01:09:55 +0100 Subject: [PATCH 26/64] add pydoc --- synapse/handlers/sync.py | 18 +++++++--- synapse/storage/state.py | 76 ++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 05bf6d46dd..8e38078332 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -423,7 +423,11 @@ class SyncHandler(object): Args: event(synapse.events.EventBase): event of interest - + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) """ @@ -440,6 +444,11 @@ class SyncHandler(object): Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) @@ -472,8 +481,6 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. - lazy_load_members(bool): Whether to only return state for members - referenced in this timeline segment Returns: A deferred new event dictionary @@ -496,7 +503,7 @@ class SyncHandler(object): types = [ (EventTypes.Member, state_key) for state_key in set( - event.sender # FIXME: we also care about targets etc. + event.sender # FIXME: we also care about invite targets etc. for event in batch.events ) ] @@ -1398,7 +1405,8 @@ class SyncHandler(object): return state = yield self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, full_state=full_state + room_id, batch, sync_config, since_token, now_token, + full_state=full_state ) if room_builder.rtype == "joined": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 55159e64d0..63b6834202 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -182,7 +182,19 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_groups_from_groups(self, groups, types): - """Returns dictionary state_group -> (dict of (type, state_key) -> event id) + """Returns the state groups for a given set of groups, filtering on + types of state events. + + Args: + groups(list[int]): list of state group IDs to query + types(list[str|None, str|None])|None: List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all types are returned. + + Returns: + dictionary state_group -> (dict of (type, state_key) -> event id) """ results = {} @@ -204,6 +216,9 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: type_set = set(types) if (None, None) in type_set: + # special case (None, None) to mean that other types should be + # returned - i.e. we were just filtering down the state keys + # for particular types. include_other_types = True type_set.remove((None, None)) types = list(type_set) # deduplicate types list @@ -360,10 +375,12 @@ class StateGroupWorkerStore(SQLBaseStore): that are in the `types` list. Args: - event_ids (list) - types (list): List of (type, state_key) tuples which are used to - filter the state fetched. `state_key` may be None, which matches - any `state_key` + event_ids (list[string]) + types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -399,9 +416,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_ids(list(str)): events whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -427,9 +446,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -444,9 +465,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -492,11 +515,11 @@ class StateGroupWorkerStore(SQLBaseStore): missing state. Args: - group: The state group to lookup - types (list): List of 2-tuples of the form (`type`, `state_key`), - where a `state_key` of `None` matches all state_keys for the - `type`. Presence of type of `None` indicates that types not - in the list should not be filtered out. + group(int): The state group to lookup + types(list[str|None, str|None]): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -560,9 +583,18 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): """Given list of groups returns dict of group -> list of state events - with matching types. `types` is a list of `(type, state_key)`, where - a `state_key` of None matches all state_keys. If `types` is None then - all events are returned. + with matching types. + + Args: + groups(list[int]): list of groups whose state to query + types(list[str|None, str|None]|None): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all events are returned. + + Returns: + dict of group -> list of state events """ if types: types = frozenset(types) From 5e6b31f0da8ba0806de856e4a9823206ac4434f9 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:21:52 +0100 Subject: [PATCH 27/64] fix dumb typo --- tests/test_dns.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_dns.py b/tests/test_dns.py index af607d626f..3b360a0fc7 100644 --- a/tests/test_dns.py +++ b/tests/test_dns.py @@ -62,7 +62,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock = Mock() dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError()) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" entry = Mock(spec_set=["expires"]) entry.expires = 0 @@ -87,7 +87,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock = Mock(spec_set=['lookupService']) dns_client_mock.lookupService = Mock(spec_set=[]) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" entry = Mock(spec_set=["expires"]) entry.expires = 999999999 @@ -111,7 +111,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError()) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" cache = {} @@ -126,7 +126,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock.lookupService.return_value = defer.fail(error.DNSNameError()) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" cache = {} From b69ff33d9e26e9efe89c745e0fda4801db8bede0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:22:27 +0100 Subject: [PATCH 28/64] disable CPUMetrics if no /proc/self/stat fixes build on macOS again --- synapse/metrics/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 371c300cac..6b5b0c9471 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -34,6 +34,7 @@ all_metrics = [] all_collectors = [] all_gauges = {} +HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") class RegistryProxy(object): @@ -99,6 +100,8 @@ class CPUMetrics(object): self.ticks_per_sec = ticks_per_sec def collect(self): + if not HAVE_PROC_SELF_STAT: + return with open("/proc/self/stat") as s: line = s.read() From 8df7bad839f04cbcabf066fe549df14879639ef5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:32:15 +0100 Subject: [PATCH 29/64] pep8 --- synapse/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 6b5b0c9471..bfdbbc9a23 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -36,6 +36,7 @@ all_gauges = {} HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") + class RegistryProxy(object): def collect(self): From 9bbb9f5556496529478753d2123526ca6894535c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 04:26:10 +0100 Subject: [PATCH 30/64] add lazy_load_members to the filter json schema --- synapse/api/filtering.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 7158dd75e9..fd58961862 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -112,7 +112,10 @@ ROOM_EVENT_FILTER_SCHEMA = { }, "contains_url": { "type": "boolean" - } + }, + "lazy_load_members": { + "type": "boolean" + }, } } From 5f6122fe102f994e023d530cb6076730f31f619f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Jun 2018 00:08:52 +0300 Subject: [PATCH 31/64] more comments --- synapse/handlers/sync.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 8e38078332..7ab97b24a6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -515,6 +515,9 @@ class SyncHandler(object): if not types: # an optimisation to stop needlessly trying to calculate # member_state_ids + # + # XXX: i can't remember what this trying to do. why would + # types ever be []? --matthew lazy_load_members = False types.append((None, None)) # don't just filter to room members @@ -568,6 +571,10 @@ class SyncHandler(object): ) if lazy_load_members: + # TODO: filter out redundant members based on their event_ids + # (not mxids) at this point. In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member From 94700e55fa93653d678fb5e27322fde4c0a15f3d Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Wed, 13 Jun 2018 10:31:01 +0100 Subject: [PATCH 32/64] if inviter_display_name == ""||None then default to inviter MXID to prevent email invite from "None" --- synapse/handlers/room_member.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/synapse/handlers/room_member.py b/synapse/handlers/room_member.py index b3f979b246..1b8dfa8254 100644 --- a/synapse/handlers/room_member.py +++ b/synapse/handlers/room_member.py @@ -565,6 +565,10 @@ class RoomMemberHandler(BaseHandler): inviter_display_name = member_event.content.get("displayname", "") inviter_avatar_url = member_event.content.get("avatar_url", "") + # if user has no display name, default to their MXID + if not inviter_display_name: + inviter_display_name = user.to_string() + canonical_room_alias = "" canonical_alias_event = room_state.get((EventTypes.CanonicalAlias, "")) if canonical_alias_event: From ff659161085d082e3071be6e9896bf63156e59d7 Mon Sep 17 00:00:00 2001 From: Sverre Moe Date: Sun, 1 Jul 2018 00:06:51 +0200 Subject: [PATCH 33/64] Add instructions for install on OpenSUSE and SLES --- README.rst | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/README.rst b/README.rst index 4fe54b0c90..b372842b5b 100644 --- a/README.rst +++ b/README.rst @@ -362,6 +362,19 @@ Synapse is in the Fedora repositories as ``matrix-synapse``:: Oleg Girko provides Fedora RPMs at https://obs.infoserver.lv/project/monitor/matrix-synapse +OpenSUSE +-------- + +Synapse is in the OpenSUSE repositories as ``matrix-synapse``:: + + sudo zypper install matrix-synapse + +SUSE Linux Enterprise Server +---------------------------- + +Unofficial package are built for SLES 15 in the openSUSE:Backports:SLE-15 repository at +https://download.opensuse.org/repositories/openSUSE:/Backports:/SLE-15/standard/ + ArchLinux --------- From 924eb34d9428a4163a03249abbb6f40d4baa29c6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 18:32:02 +0100 Subject: [PATCH 34/64] add a filtered_types param to limit filtering to specific types --- synapse/handlers/sync.py | 65 ++++++++++++---------- synapse/storage/state.py | 113 +++++++++++++++++++++------------------ 2 files changed, 96 insertions(+), 82 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0c21ac2c77..cb711b8758 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -417,38 +417,44 @@ class SyncHandler(object): )) @defer.inlineCallbacks - def get_state_after_event(self, event, types=None): + def get_state_after_event(self, event, types=None, filtered_types=None): """ Get the room state after the given event Args: event(synapse.events.EventBase): event of interest - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. + Returns: A Deferred map from ((type, state_key)->Event) """ - state_ids = yield self.store.get_state_ids_for_event(event.event_id, types) + state_ids = yield self.store.get_state_ids_for_event( + event.event_id, types, filtered_types=filtered_types + ) if event.is_state(): state_ids = state_ids.copy() state_ids[(event.type, event.state_key)] = event.event_id defer.returnValue(state_ids) @defer.inlineCallbacks - def get_state_at(self, room_id, stream_position, types=None): + def get_state_at(self, room_id, stream_position, types=None, filtered_types=None): """ Get the room state at a particular stream position Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. - May be None, which matches any key. + all events are returned of the given type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A Deferred map from ((type, state_key)->Event) @@ -463,7 +469,9 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] - state = yield self.get_state_after_event(last_event, types) + state = yield self.get_state_after_event( + last_event, types, filtered_types=filtered_types + ) else: # no events in this room - so presumably no state @@ -499,6 +507,7 @@ class SyncHandler(object): types = None member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() + filtered_types = None if lazy_load_members: # We only request state for the members needed to display the @@ -516,29 +525,25 @@ class SyncHandler(object): # to be done based on event_id, and we don't have the member # event ids until we've pulled them out of the DB. - if not types: - # an optimisation to stop needlessly trying to calculate - # member_state_ids - # - # XXX: i can't remember what this trying to do. why would - # types ever be []? --matthew - lazy_load_members = False - - types.append((None, None)) # don't just filter to room members + # only apply the filtering to room members + filtered_types = [EventTypes.Member] if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) else: current_state_ids = yield self.get_state_at( - room_id, stream_position=now_token, types=types + room_id, stream_position=now_token, types=types, + filtered_types=filtered_types ) state_ids = current_state_ids @@ -563,15 +568,18 @@ class SyncHandler(object): ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( - room_id, stream_position=since_token, types=types + room_id, stream_position=since_token, types=types, + filtered_types=filtered_types ) current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) if lazy_load_members: @@ -603,11 +611,10 @@ class SyncHandler(object): # event_ids) at this point. We know we can do it based on mxid as this # is an non-gappy incremental sync. - # strip off the (None, None) and filter to just room members - types = types[:-1] if types: state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c5ff44fef7..ee531a2ce0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types): + def _get_state_groups_from_groups(self, groups, types, filtered_types=None): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -193,9 +193,10 @@ class StateGroupWorkerStore(SQLBaseStore): groups(list[int]): list of state group IDs to query types(list[str|None, str|None])|None: List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. If None, - all types are returned. + state_keys for the `type`. If None, all types are returned. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -206,26 +207,21 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, + self._get_state_groups_from_groups_txn, chunk, types, filtered_types ) results.update(res) defer.returnValue(results) - def _get_state_groups_from_groups_txn(self, txn, groups, types=None): + def _get_state_groups_from_groups_txn( + self, txn, groups, types=None, filtered_types=None + ): results = {group: {} for group in groups} - include_other_types = False + include_other_types = False if filtered_types is None else True if types is not None: - type_set = set(types) - if (None, None) in type_set: - # special case (None, None) to mean that other types should be - # returned - i.e. we were just filtering down the state keys - # for particular types. - include_other_types = True - type_set.remove((None, None)) - types = list(type_set) # deduplicate types list + types = list(set(types)) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -276,7 +272,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -313,7 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) @@ -373,18 +369,20 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types): + def get_state_for_events(self, event_ids, types, filtered_types): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. Args: event_ids (list[string]) - types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + types (list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -395,7 +393,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) state_event_map = yield self.get_events( [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)], @@ -414,17 +412,19 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_ids_for_events(self, event_ids, types=None): + def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None): """ Get the state dicts corresponding to a list of events Args: event_ids(list(str)): events whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -434,7 +434,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) event_to_state = { event_id: group_to_state[group] @@ -444,41 +444,45 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_for_event(self, event_id, types=None): + def get_state_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_for_events([event_id], types) + state_map = yield self.get_state_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @defer.inlineCallbacks - def get_state_ids_for_event(self, event_id, types=None): + def get_state_ids_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_ids_for_events([event_id], types) + state_map = yield self.get_state_ids_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @cached(max_entries=50000) @@ -509,7 +513,7 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) - def _get_some_state_from_cache(self, group, types): + def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). @@ -520,29 +524,30 @@ class StateGroupWorkerStore(SQLBaseStore): Args: group(int): The state group to lookup - types(list[str|None, str|None]): List of 2-tuples of the form + types(list[str, str|None]): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. + state_keys for the `type`. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} + + # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False + include_other_types = True if filtered_types is None else False for typ, state_key in types: key = (typ, state_key) - if typ is None: - include_other_types = True - next - if state_key is None: type_to_key[typ] = None # XXX: why do we mark the type as missing from our cache just # because we weren't filtering on a specific value of state_key? + # is it because the cache doesn't handle wildcards? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -556,7 +561,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types + return include_other_types and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -585,21 +590,23 @@ class StateGroupWorkerStore(SQLBaseStore): return state_dict_ids, is_all @defer.inlineCallbacks - def _get_state_for_groups(self, groups, types=None): + def _get_state_for_groups(self, groups, types=None, filtered_types=None): """Gets the state at each of a list of state groups, optionally filtering by type/state_key Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None|str, None|str)]): + types (None|iterable[(None, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. Otherwise, each entry should be a `(type, state_key)` tuple to include in the response. A `state_key` of None is a wildcard - meaning that we require all state with that type. A `type` of None - indicates that types not in the list should not be filtered out. + meaning that we require all state with that type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: Deferred[dict[int, dict[(type, state_key), EventBase]]] @@ -612,7 +619,7 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: for group in set(groups): state_dict_ids, _, got_all = self._get_some_state_from_cache( - group, types, + group, types, filtered_types ) results[group] = state_dict_ids @@ -645,7 +652,7 @@ class StateGroupWorkerStore(SQLBaseStore): types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, + missing_groups, types_to_fetch, filtered_types ) for group, group_state_dict in iteritems(group_to_state_dict): From bcaec2915ac74937171e27d507b8f9c0e39d3677 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:03:50 +0100 Subject: [PATCH 35/64] incorporate review --- synapse/handlers/sync.py | 44 ++++++++++++++++++++++++---------------- synapse/storage/state.py | 7 ++++--- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index cb711b8758..b597f94cf6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -435,7 +435,7 @@ class SyncHandler(object): A Deferred map from ((type, state_key)->Event) """ state_ids = yield self.store.get_state_ids_for_event( - event.event_id, types, filtered_types=filtered_types + event.event_id, types, filtered_types=filtered_types, ) if event.is_state(): state_ids = state_ids.copy() @@ -470,7 +470,7 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] state = yield self.get_state_after_event( - last_event, types, filtered_types=filtered_types + last_event, types, filtered_types=filtered_types, ) else: @@ -505,7 +505,6 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None - member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() filtered_types = None @@ -521,10 +520,6 @@ class SyncHandler(object): ) ] - # We can't remove redundant member types at this stage as it has - # to be done based on event_id, and we don't have the member - # event ids until we've pulled them out of the DB. - # only apply the filtering to room members filtered_types = [EventTypes.Member] @@ -532,27 +527,32 @@ class SyncHandler(object): if batch: current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = current_state_ids + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -569,28 +569,38 @@ class SyncHandler(object): elif batch.limited: state_at_previous_sync = yield self.get_state_at( room_id, stream_position=since_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_at_timeline_start = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: - # TODO: filter out redundant members based on their event_ids - # (not mxids) at this point. In practice, limited syncs are + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implement, this should filter using event_ids (not mxids). + # In practice, limited syncs are # relatively rare so it's not a total disaster to send redundant - # members down at this point. + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -614,7 +624,7 @@ class SyncHandler(object): if types: state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ee531a2ce0..75c6366e7a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -545,9 +545,10 @@ class StateGroupWorkerStore(SQLBaseStore): if state_key is None: type_to_key[typ] = None - # XXX: why do we mark the type as missing from our cache just - # because we weren't filtering on a specific value of state_key? - # is it because the cache doesn't handle wildcards? + # we mark the type as missing from the cache because + # when the cache was populated it might have been done with a + # restricted set of state_keys, so the wildcard will not work + # and the cache may be incomplete. missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: From 2f558300cc648e633342746dc7b42a36fcb6b32e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:22:27 +0100 Subject: [PATCH 36/64] fix thinkos; unbreak tests --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 75c6366e7a..f09be7172d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -369,7 +369,7 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types, filtered_types): + def get_state_for_events(self, event_ids, types, filtered_types=None): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. @@ -538,7 +538,7 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = True if filtered_types is None else False + include_other_types = False if filtered_types is None else True for typ, state_key in types: key = (typ, state_key) From 1fa4f7e03e5cdaebeda0f0b4c49120c991f5bf57 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 20:19:32 +0100 Subject: [PATCH 37/64] first cut of a UT for testing state store (untested) --- tests/storage/test_state.py | 151 ++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 tests/storage/test_state.py diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py new file mode 100644 index 0000000000..acf36e077f --- /dev/null +++ b/tests/storage/test_state.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.api.constants import EventTypes, Membership +from synapse.types import RoomID, UserID + +import tests.unittest +import tests.utils + + +class StateStoreTestCase(tests.unittest.TestCase): + def __init__(self, *args, **kwargs): + super(StateStoreTestCase, self).__init__(*args, **kwargs) + self.store = None # type: synapse.storage.DataStore + + @defer.inlineCallbacks + def setUp(self): + hs = yield tests.utils.setup_test_homeserver() + + self.store = hs.get_datastore() + self.event_builder_factory = hs.get_event_builder_factory() + self.event_creation_handler = hs.get_event_creation_handler() + + self.u_alice = UserID.from_string("@alice:test") + self.u_bob = UserID.from_string("@bob:test") + + # User elsewhere on another host + self.u_charlie = UserID.from_string("@charlie:elsewhere") + + self.room = RoomID.from_string("!abc123:test") + + yield self.store.store_room( + self.room.to_string(), + room_creator_user_id="@creator:text", + is_public=True + ) + + @defer.inlineCallbacks + def inject_state_event(self, room, sender, typ, state_key, content): + builder = self.event_builder_factory.new({ + "type": typ, + "sender": sender.to_string(), + "state_key": state_key, + "room_id": room.to_string(), + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + + yield self.store.persist_event(event, context) + + defer.returnValue(event) + + @defer.inlineCallbacks + def test_get_state_for_events(self): + + # this defaults to a linear DAG as each new injection defaults to whatever + # forward extremities are currently in the DB for this room. + (e1, c1) = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Create, '', {}, + ) + (e2, c2) = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Name, '', { + "name": "test room" + }, + ) + (e3, c3) = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Member, self.u_alice, { + "membership": Membership.JOIN + }, + ) + (e4, c4) = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob, { + "membership": Membership.JOIN + }, + ) + (e5, c5) = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob, { + "membership": Membership.LEAVE + }, + ) + + # check we get the full state as of the final event + state = yield self.store.get_state_for_events( + e5.event_id, None, filtered_types=None + ) + + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }, state) + + # check we can filter to the m.room.name event (with a '' state key) + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Name, '')), filtered_types=None + ) + + self.assertDictEqual({ + (e2.type, e2.state_key): e2.event_id, + }, state) + + # check we can filter to the m.room.name event (with a wildcard None state key) + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Name, None)), filtered_types=None + ) + + self.assertDictEqual({ + (e2.type, e2.state_key): e2.event_id, + }, state) + + # check we can grab the m.room.member events (with a wildcard None state key) + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Member, None)), filtered_types=None + ) + + self.assertDictEqual({ + (e3.type, e3.state_key): e3.event_id, + (e5.type, e5.state_key): e5.event_id, + }, state) + + # check we can use filter_types to grab a specific room member + # without filtering out the other event types + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Member, self.u_alice)), + filtered_types=[EventTypes.Member], + ) + + self.assertDictEqual({ + (e1.type, e1.state_key): e3.event_id, + (e2.type, e2.state_key): e3.event_id, + (e3.type, e3.state_key): e5.event_id, + }, state) From 650daf56285c515d9c0875a9a3894a033337b0c9 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 20:49:44 +0100 Subject: [PATCH 38/64] make test work --- tests/storage/test_state.py | 83 ++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index acf36e077f..8924ba9f7f 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging + from twisted.internet import defer from synapse.api.constants import EventTypes, Membership @@ -21,6 +23,8 @@ from synapse.types import RoomID, UserID import tests.unittest import tests.utils +logger = logging.getLogger(__name__) + class StateStoreTestCase(tests.unittest.TestCase): def __init__(self, *args, **kwargs): @@ -38,9 +42,6 @@ class StateStoreTestCase(tests.unittest.TestCase): self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") - # User elsewhere on another host - self.u_charlie = UserID.from_string("@charlie:elsewhere") - self.room = RoomID.from_string("!abc123:test") yield self.store.store_room( @@ -67,85 +68,93 @@ class StateStoreTestCase(tests.unittest.TestCase): defer.returnValue(event) + def assertStateMapEqual(self, s1, s2): + for t in s1: + # just compare event IDs for simplicity + self.assertEqual(s1[t].event_id, s2[t].event_id) + self.assertEqual(len(s1), len(s2)) + @defer.inlineCallbacks - def test_get_state_for_events(self): + def test_get_state_for_event(self): # this defaults to a linear DAG as each new injection defaults to whatever # forward extremities are currently in the DB for this room. - (e1, c1) = yield self.inject_state_event( + e1 = yield self.inject_state_event( self.room, self.u_alice, EventTypes.Create, '', {}, ) - (e2, c2) = yield self.inject_state_event( + e2 = yield self.inject_state_event( self.room, self.u_alice, EventTypes.Name, '', { "name": "test room" }, ) - (e3, c3) = yield self.inject_state_event( - self.room, self.u_alice, EventTypes.Member, self.u_alice, { + e3 = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Member, self.u_alice.to_string(), { "membership": Membership.JOIN }, ) - (e4, c4) = yield self.inject_state_event( - self.room, self.u_bob, EventTypes.Member, self.u_bob, { + e4 = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob.to_string(), { "membership": Membership.JOIN }, ) - (e5, c5) = yield self.inject_state_event( - self.room, self.u_bob, EventTypes.Member, self.u_bob, { + e5 = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob.to_string(), { "membership": Membership.LEAVE }, ) # check we get the full state as of the final event - state = yield self.store.get_state_for_events( + state = yield self.store.get_state_for_event( e5.event_id, None, filtered_types=None ) - self.assertDictEqual({ - (e1.type, e1.state_key): e1.event_id, - (e2.type, e2.state_key): e2.event_id, - (e3.type, e3.state_key): e3.event_id, + self.assertIsNotNone(e4) + + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + (e3.type, e3.state_key): e3, # e4 is overwritten by e5 - (e5.type, e5.state_key): e5.event_id, + (e5.type, e5.state_key): e5, }, state) # check we can filter to the m.room.name event (with a '' state key) - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Name, '')), filtered_types=None + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Name, '')], filtered_types=None ) - self.assertDictEqual({ - (e2.type, e2.state_key): e2.event_id, + self.assertStateMapEqual({ + (e2.type, e2.state_key): e2, }, state) # check we can filter to the m.room.name event (with a wildcard None state key) - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Name, None)), filtered_types=None + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Name, None)], filtered_types=None ) - self.assertDictEqual({ - (e2.type, e2.state_key): e2.event_id, + self.assertStateMapEqual({ + (e2.type, e2.state_key): e2, }, state) # check we can grab the m.room.member events (with a wildcard None state key) - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Member, None)), filtered_types=None + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Member, None)], filtered_types=None ) - self.assertDictEqual({ - (e3.type, e3.state_key): e3.event_id, - (e5.type, e5.state_key): e5.event_id, + self.assertStateMapEqual({ + (e3.type, e3.state_key): e3, + (e5.type, e5.state_key): e5, }, state) # check we can use filter_types to grab a specific room member # without filtering out the other event types - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Member, self.u_alice)), + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Member, self.u_alice.to_string())], filtered_types=[EventTypes.Member], ) - self.assertDictEqual({ - (e1.type, e1.state_key): e3.event_id, - (e2.type, e2.state_key): e3.event_id, - (e3.type, e3.state_key): e5.event_id, + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + (e3.type, e3.state_key): e3, }, state) From 254fb430d1662c93c56c2abbd6984e07fb04c36b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 19:21:20 +0100 Subject: [PATCH 39/64] incorporate review --- synapse/handlers/sync.py | 67 ++++++++++++++++------------------------ synapse/storage/state.py | 20 +++++------- 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b597f94cf6..5689ad2f58 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -543,17 +543,6 @@ class SyncHandler(object): state_ids = current_state_ids - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -562,9 +551,9 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, - timeline_start_members=member_state_ids, previous={}, current=current_state_ids, + lazy_load_members=lazy_load_members, ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( @@ -582,37 +571,27 @@ class SyncHandler(object): filtered_types=filtered_types, ) - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - # TODO: optionally filter out redundant membership events at this - # point, to stop repeatedly sending members in every /sync as if - # the client isn't tracking them. - # When implement, this should filter using event_ids (not mxids). - # In practice, limited syncs are - # relatively rare so it's not a total disaster to send redundant - # members down at this point. Redundant members are ones which - # repeatedly get sent down /sync because we don't know if the client - # is caching them or not. - member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() } + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implemented, this should filter using event_ids (not mxids). + # In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. + state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, - timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, + lazy_load_members=lazy_load_members, ) else: state_ids = {} @@ -1536,16 +1515,14 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, timeline_start_members, - previous, current): +def _calculate_state( + timeline_contains, timeline_start, previous, current, lazy_load_members, +): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline - timeline_start_members (dict): state at the start of the timeline - for room members who participate in this chunk of timeline. - Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1565,11 +1542,21 @@ def _calculate_state(timeline_contains, timeline_start, timeline_start_members, c_ids = set(e for e in current.values()) ts_ids = set(e for e in timeline_start.values()) - tsm_ids = set(e for e in timeline_start_members.values()) p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids + # track the membership events in the state as of the start of the timeline + # so we can add them back in to the state if we're lazyloading. We don't + # add them into state if they're already contained in the timeline. + if lazy_load_members: + ll_ids = set( + e for t, e in timeline_start.iteritems() + if t[0] == EventTypes.Member and e not in tc_ids + ) + else: + ll_ids = set() + + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids return { event_id_to_key[e]: e for e in state_ids diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f09be7172d..40ca8bd2a2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -191,10 +191,10 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups(list[int]): list of state group IDs to query - types(list[str|None, str|None])|None: List of 2-tuples of the form + types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(list[str]|None): Only apply filtering via `types` to this + filtered_types(Iterable[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. @@ -207,19 +207,17 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types + self._get_state_groups_from_groups_txn, chunk, types, filtered_types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None + self, txn, groups, types=None, filtered_types=None, ): results = {group: {} for group in groups} - include_other_types = False if filtered_types is None else True - if types is not None: types = list(set(types)) # deduplicate types list @@ -269,7 +267,7 @@ class StateGroupWorkerStore(SQLBaseStore): for etype, state_key in types ] - if include_other_types: + if filtered_types is not None: # XXX: check whether this slows postgres down like a list of # ORs does too? unique_types = set(filtered_types) @@ -308,7 +306,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if include_other_types: + if filtered_types is not None: unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" @@ -538,8 +536,6 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False if filtered_types is None else True - for typ, state_key in types: key = (typ, state_key) @@ -562,7 +558,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types and typ not in filtered_types + return filtered_types is not None and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -598,7 +594,7 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None, None|str)]): + types (None|iterable[(str, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. From 004a83b43a5c75ec6ca3378c8ef5a4c616e33e8c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 22:32:35 +0100 Subject: [PATCH 40/64] changelog --- changelog.d/2970.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/2970.feature diff --git a/changelog.d/2970.feature b/changelog.d/2970.feature new file mode 100644 index 0000000000..5eb928563f --- /dev/null +++ b/changelog.d/2970.feature @@ -0,0 +1 @@ +add support for the lazy_loaded_members filter as per MSC1227 From efcdacad7d1b7f52f879179701c7e0d9b763511f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 22:41:05 +0100 Subject: [PATCH 41/64] handle case where types is [] on postgres correctly --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 40ca8bd2a2..f99d3871e4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -255,7 +255,7 @@ class StateGroupWorkerStore(SQLBaseStore): # Turns out that postgres doesn't like doing a list of OR's and # is about 1000x slower, so we just issue a query for each specific # type seperately. - if types: + if types is not None: clause_to_args = [ ( "AND type = ? AND state_key = ?", From cd241d6bda01a761fbe1ca29727dacd918fb8975 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 12:39:40 +0100 Subject: [PATCH 42/64] incorporate more review --- synapse/handlers/sync.py | 12 +++++++++--- synapse/storage/state.py | 36 +++++++++--------------------------- tests/storage/test_state.py | 9 +++++++++ 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5689ad2f58..e5a2329d73 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1526,6 +1526,9 @@ def _calculate_state( previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline + lazy_load_members (bool): whether to return members from timeline_start + or not. assumes that timeline_start has already been filtered to + include only the members the client needs to know about. Returns: dict @@ -1545,9 +1548,12 @@ def _calculate_state( p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - # track the membership events in the state as of the start of the timeline - # so we can add them back in to the state if we're lazyloading. We don't - # add them into state if they're already contained in the timeline. + # If we are lazyloading room members, we explicitly add the membership events + # for the senders in the timeline into the state block returned by /sync, + # as we may not have sent them to the client before. We find these membership + # events by filtering them out of timeline_start, which has already been filtered + # to only include membership events for the senders in the timeline. + if lazy_load_members: ll_ids = set( e for t, e in timeline_start.iteritems() diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f99d3871e4..1413a6f910 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types, filtered_types=None): + def _get_state_groups_from_groups(self, groups, types): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -194,9 +194,6 @@ class StateGroupWorkerStore(SQLBaseStore): types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(Iterable[str]|None): Only apply filtering via `types` to this - list of event types. Other types of events are returned unfiltered. - If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -207,14 +204,14 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types, + self._get_state_groups_from_groups_txn, chunk, types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None, + self, txn, groups, types=None, ): results = {group: {} for group in groups} @@ -266,17 +263,6 @@ class StateGroupWorkerStore(SQLBaseStore): ) for etype, state_key in types ] - - if filtered_types is not None: - # XXX: check whether this slows postgres down like a list of - # ORs does too? - unique_types = set(filtered_types) - clause_to_args.append( - ( - "AND type <> ? " * len(unique_types), - list(unique_types) - ) - ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -306,13 +292,6 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if filtered_types is not None: - unique_types = set(filtered_types) - where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" - ) - where_args.extend(list(unique_types)) - where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" @@ -643,13 +622,13 @@ class StateGroupWorkerStore(SQLBaseStore): # cache. Hence, if we are doing a wildcard lookup, populate the # cache fully so that we can do an efficient lookup next time. - if types and any(k is None for (t, k) in types): + if filtered_types or (types and any(k is None for (t, k) in types)): types_to_fetch = None else: types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, filtered_types + missing_groups, types_to_fetch ) for group, group_state_dict in iteritems(group_to_state_dict): @@ -659,7 +638,10 @@ class StateGroupWorkerStore(SQLBaseStore): if types: for k, v in iteritems(group_state_dict): (typ, _) = k - if k in types or (typ, None) in types: + if ( + (k in types or (typ, None) in types) or + (filtered_types and typ not in filtered_types) + ): state_dict[k] = v else: state_dict.update(group_state_dict) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 8924ba9f7f..b2f314e9db 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -158,3 +158,12 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2, (e3.type, e3.state_key): e3, }, state) + + state = yield self.store.get_state_for_event( + e5.event_id, [], filtered_types=[EventTypes.Member], + ) + + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + }, state) From eb1d911ab743e85154f7c4b2db8a954d152020dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 13:40:49 +0100 Subject: [PATCH 43/64] rather than adding ll_ids, remove them from p_ids --- synapse/handlers/sync.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e5a2329d73..1422843af8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1553,16 +1553,17 @@ def _calculate_state( # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. + # In practice, we can do this by removing them from the p_ids list. + # see https://github.com/matrix-org/synapse/pull/2970 + # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: - ll_ids = set( + p_ids.difference_update( e for t, e in timeline_start.iteritems() - if t[0] == EventTypes.Member and e not in tc_ids + if t[0] == EventTypes.Member ) - else: - ll_ids = set() - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids + state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids return { event_id_to_key[e]: e for e in state_ids From e22700c3dd929f9f0953b3d2b37e503eece82b38 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 13:59:07 +0100 Subject: [PATCH 44/64] consider non-filter_type types as wildcards, thus missing from the state-group-cache --- synapse/storage/state.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1413a6f910..86f2c2e6b1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -518,7 +518,10 @@ class StateGroupWorkerStore(SQLBaseStore): for typ, state_key in types: key = (typ, state_key) - if state_key is None: + if ( + state_key is None or + filtered_types is not None and typ not in filtered_types + ): type_to_key[typ] = None # we mark the type as missing from the cache because # when the cache was populated it might have been done with a From 1a01a5b964d3ea373355684a91b9f7fd95726fbc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 14:03:15 +0100 Subject: [PATCH 45/64] clarify comment on p_ids --- synapse/handlers/sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1422843af8..4ced3144c8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1553,7 +1553,8 @@ def _calculate_state( # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. - # In practice, we can do this by removing them from the p_ids list. + # In practice, we can do this by removing them from the p_ids list, + # which is the list of relevant state we know we have already sent to the client. # see https://github.com/matrix-org/synapse/pull/2970 # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 From 38eaa5280d49df8d0e52f93693a576b7e936bde5 Mon Sep 17 00:00:00 2001 From: Michael Telatynski <7t3chguy@gmail.com> Date: Tue, 24 Jul 2018 17:25:42 +0100 Subject: [PATCH 46/64] add changelog entry for PR#3391 Signed-off-by: Michael Telatynski <7t3chguy@gmail.com> --- changelog.d/3391.bugfix | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3391.bugfix diff --git a/changelog.d/3391.bugfix b/changelog.d/3391.bugfix new file mode 100644 index 0000000000..88eeb50df2 --- /dev/null +++ b/changelog.d/3391.bugfix @@ -0,0 +1 @@ +Default inviter_display_name to mxid for email invites \ No newline at end of file From cb5c37a57c387a74f6079008870cf024e674dfe5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 20:34:45 +0100 Subject: [PATCH 47/64] handle the edge case for _get_some_state_from_cache where types is [] --- synapse/storage/state.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 86f2c2e6b1..989977c644 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -547,7 +547,13 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - got_all = is_all or not missing_types + if types == [] and filtered_types is not None: + # special wildcard case for empty type-list but an explicit filtered_types + # which means that we'll try to return all types which aren't in the + # filtered_types list. missing_types will always be empty, so we ignore it. + got_all = is_all + else: + got_all = is_all or not missing_types return { k: v for k, v in iteritems(state_dict_ids) From 7d9fb88617f475ac7e064c5cccc8d78dbd78d2a3 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 16:15:33 +0100 Subject: [PATCH 48/64] incorporate more review. --- synapse/storage/state.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 989977c644..e38427bf9d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -493,12 +493,6 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` - Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). - `missing_types` is the list of types that aren't in the cache for that - group. `got_all` is a bool indicating if we successfully retrieved all - requests state from the cache, if False we need to query the DB for the - missing state. - Args: group(int): The state group to lookup types(list[str, str|None]): List of 2-tuples of the form @@ -507,6 +501,11 @@ class StateGroupWorkerStore(SQLBaseStore): filtered_types(list[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. + + Returns 2-tuple (`state_dict`, `got_all`). + `got_all` is a bool indicating if we successfully retrieved all + requests state from the cache, if False we need to query the DB for the + missing state. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -520,7 +519,7 @@ class StateGroupWorkerStore(SQLBaseStore): if ( state_key is None or - filtered_types is not None and typ not in filtered_types + (filtered_types is not None and typ not in filtered_types) ): type_to_key[typ] = None # we mark the type as missing from the cache because @@ -547,18 +546,17 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - if types == [] and filtered_types is not None: - # special wildcard case for empty type-list but an explicit filtered_types - # which means that we'll try to return all types which aren't in the - # filtered_types list. missing_types will always be empty, so we ignore it. - got_all = is_all - else: - got_all = is_all or not missing_types + got_all = is_all + if not got_all: + # the cache is incomplete. We may still have got all the results we need, if + # we don't have any wildcards in the match list. + if not missing_types and filtered_types is None: + got_all = True return { k: v for k, v in iteritems(state_dict_ids) if include(k[0], k[1]) - }, missing_types, got_all + }, got_all def _get_all_state_from_cache(self, group): """Checks if group is in cache. See `_get_state_for_groups` @@ -603,7 +601,7 @@ class StateGroupWorkerStore(SQLBaseStore): missing_groups = [] if types is not None: for group in set(groups): - state_dict_ids, _, got_all = self._get_some_state_from_cache( + state_dict_ids, got_all = self._get_some_state_from_cache( group, types, filtered_types ) results[group] = state_dict_ids From 0a7ee0ab8b0794c8633177edca8f839d34c6a42a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 16:10:34 +0100 Subject: [PATCH 49/64] add tests for _get_some_state_from_cache --- tests/storage/test_state.py | 150 ++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index b2f314e9db..b7797c45e8 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -159,6 +159,8 @@ class StateStoreTestCase(tests.unittest.TestCase): (e3.type, e3.state_key): e3, }, state) + # check that types=[], filtered_types=[EventTypes.Member] + # doesn't return all members state = yield self.store.get_state_for_event( e5.event_id, [], filtered_types=[EventTypes.Member], ) @@ -167,3 +169,151 @@ class StateStoreTestCase(tests.unittest.TestCase): (e1.type, e1.state_key): e1, (e2.type, e2.state_key): e2, }, state) + + ################################## + # _get_some_state_from_cache tests + ################################## + + room_id = self.room.to_string() + group_ids = yield self.store.get_state_groups_ids(room_id, [e5.event_id]) + group = group_ids.keys()[0] + + # test that _get_some_state_from_cache correctly filters out members with types=[] + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=wildcard + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + # and no filtered_types + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=None + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + ####################################################### + # deliberately remove e2 (room name) from the _state_group_cache + + (is_all, known_absent, state_dict_ids) = self.store._state_group_cache.get(group) + + self.assertEqual(is_all, True) + self.assertEqual(known_absent, set()) + self.assertDictEqual(state_dict_ids, { + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }) + + state_dict_ids.pop((e2.type, e2.state_key)) + self.store._state_group_cache.invalidate(group) + self.store._state_group_cache.update( + sequence=self.store._state_group_cache.sequence, + key=group, + value=state_dict_ids, + # list fetched keys so it knows it's partial + fetched_keys=( + (e1.type, e1.state_key), + (e3.type, e3.state_key), + (e5.type, e5.state_key), + ) + ) + + (is_all, known_absent, state_dict_ids) = self.store._state_group_cache.get(group) + + self.assertEqual(is_all, False) + self.assertEqual(known_absent, set([ + (e1.type, e1.state_key), + (e3.type, e3.state_key), + (e5.type, e5.state_key), + ])) + self.assertDictEqual(state_dict_ids, { + (e1.type, e1.state_key): e1.event_id, + (e3.type, e3.state_key): e3.event_id, + (e5.type, e5.state_key): e5.event_id, + }) + + ################################################### + # test that things work with a partial cache + + # test that _get_some_state_from_cache correctly filters out members with types=[] + room_id = self.room.to_string() + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, False) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=wildcard + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, False) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, False) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + # and no filtered_types + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=None + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e5.type, e5.state_key): e5.event_id, + }, state_dict) From 0620d27f4d4a7f5c228c519b489efc8bc0c0a7d0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 17:21:17 +0100 Subject: [PATCH 50/64] flake8 --- tests/storage/test_state.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index b7797c45e8..7a76d67b8c 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -170,15 +170,15 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2, }, state) - ################################## - # _get_some_state_from_cache tests - ################################## + ####################################################### + # _get_some_state_from_cache tests against a full cache + ####################################################### room_id = self.room.to_string() group_ids = yield self.store.get_state_groups_ids(room_id, [e5.event_id]) group = group_ids.keys()[0] - # test that _get_some_state_from_cache correctly filters out members with types=[] + # test _get_some_state_from_cache correctly filters out members with types=[] (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [], filtered_types=[EventTypes.Member] ) @@ -189,7 +189,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=wildcard + # test _get_some_state_from_cache correctly filters in members with wildcard types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] ) @@ -203,7 +203,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] ) @@ -215,7 +215,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types # and no filtered_types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=None @@ -269,10 +269,10 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }) - ################################################### + ############################################ # test that things work with a partial cache - # test that _get_some_state_from_cache correctly filters out members with types=[] + # test _get_some_state_from_cache correctly filters out members with types=[] room_id = self.room.to_string() (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [], filtered_types=[EventTypes.Member] @@ -283,7 +283,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e1.type, e1.state_key): e1.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=wildcard + # test _get_some_state_from_cache correctly filters in members wildcard types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] ) @@ -296,7 +296,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] ) @@ -307,7 +307,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types # and no filtered_types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=None From d8e65ed7e111243c08c0b87c9a49e7537c355074 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 25 Jul 2018 15:44:41 -0600 Subject: [PATCH 51/64] Fix a minor documentation typo in on_make_leave --- synapse/handlers/federation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 145c1a21d4..49068c06d9 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -1279,7 +1279,7 @@ class FederationHandler(BaseHandler): @log_function def on_make_leave_request(self, room_id, user_id): """ We've received a /make_leave/ request, so we create a partial - join event for the room and return that. We do *not* persist or + leave event for the room and return that. We do *not* persist or process it until the other server has signed it and sent it back. """ builder = self.event_builder_factory.new({ From 6185650f9cb0b9db13d90b8447c441ecccfe1701 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Wed, 25 Jul 2018 15:46:56 -0600 Subject: [PATCH 52/64] Create 3609.misc --- changelog.d/3609.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3609.misc diff --git a/changelog.d/3609.misc b/changelog.d/3609.misc new file mode 100644 index 0000000000..5b9566d076 --- /dev/null +++ b/changelog.d/3609.misc @@ -0,0 +1 @@ +Fix a documentation typo in on_make_leave_request From bc7944e6d2ea0076badd0eba414e1ba7020eb1e6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 23:36:31 +0100 Subject: [PATCH 53/64] switch missing_types to be a bool --- synapse/storage/state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e38427bf9d..b27b3ae144 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -511,8 +511,8 @@ class StateGroupWorkerStore(SQLBaseStore): type_to_key = {} - # tracks which of the requested types are missing from our cache - missing_types = set() + # tracks whether any of ourrequested types are missing from the cache + missing_types = False for typ, state_key in types: key = (typ, state_key) @@ -526,13 +526,13 @@ class StateGroupWorkerStore(SQLBaseStore): # when the cache was populated it might have been done with a # restricted set of state_keys, so the wildcard will not work # and the cache may be incomplete. - missing_types.add(key) + missing_types = True else: if type_to_key.get(typ, object()) is not None: type_to_key.setdefault(typ, set()).add(state_key) if key not in state_dict_ids and key not in known_absent: - missing_types.add(key) + missing_types = True sentinel = object() From 03751a64203b169cbf33b636b6d940ca6d414c31 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 11:44:26 +0100 Subject: [PATCH 54/64] Fix some looping_call calls which were broken in #3604 It turns out that looping_call does check the deferred returned by its callback, and (at least in the case of client_ips), we were relying on this, and I broke it in #3604. Update run_as_background_process to return the deferred, and make sure we return it to clock.looping_call. --- changelog.d/3610.feature | 1 + synapse/app/homeserver.py | 4 ++-- synapse/groups/attestations.py | 2 +- synapse/handlers/profile.py | 2 +- synapse/metrics/background_process_metrics.py | 10 ++++++++-- synapse/rest/media/v1/media_repository.py | 2 +- synapse/rest/media/v1/preview_url_resource.py | 2 +- synapse/storage/client_ips.py | 2 +- synapse/storage/devices.py | 2 +- synapse/storage/event_federation.py | 2 +- synapse/storage/event_push_actions.py | 4 ++-- synapse/storage/transactions.py | 4 +++- synapse/util/caches/expiringcache.py | 2 +- 13 files changed, 24 insertions(+), 15 deletions(-) create mode 100644 changelog.d/3610.feature diff --git a/changelog.d/3610.feature b/changelog.d/3610.feature new file mode 100644 index 0000000000..77a294cb9f --- /dev/null +++ b/changelog.d/3610.feature @@ -0,0 +1 @@ +Add metrics to track resource usage by background processes diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index b7e7718290..57b815d777 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -429,7 +429,7 @@ def run(hs): stats_process = [] def start_phone_stats_home(): - run_as_background_process("phone_stats_home", phone_stats_home) + return run_as_background_process("phone_stats_home", phone_stats_home) @defer.inlineCallbacks def phone_stats_home(): @@ -502,7 +502,7 @@ def run(hs): ) def generate_user_daily_visit_stats(): - run_as_background_process( + return run_as_background_process( "generate_user_daily_visits", hs.get_datastore().generate_user_daily_visits, ) diff --git a/synapse/groups/attestations.py b/synapse/groups/attestations.py index 4216af0a27..b04f4234ca 100644 --- a/synapse/groups/attestations.py +++ b/synapse/groups/attestations.py @@ -153,7 +153,7 @@ class GroupAttestionRenewer(object): defer.returnValue({}) def _start_renew_attestations(self): - run_as_background_process("renew_attestations", self._renew_attestations) + return run_as_background_process("renew_attestations", self._renew_attestations) @defer.inlineCallbacks def _renew_attestations(self): diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 43692b83a8..cb5c6d587e 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -256,7 +256,7 @@ class ProfileHandler(BaseHandler): ) def _start_update_remote_profile_cache(self): - run_as_background_process( + return run_as_background_process( "Update remote profile", self._update_remote_profile_cache, ) diff --git a/synapse/metrics/background_process_metrics.py b/synapse/metrics/background_process_metrics.py index 9d820e44a6..ce678d5f75 100644 --- a/synapse/metrics/background_process_metrics.py +++ b/synapse/metrics/background_process_metrics.py @@ -151,13 +151,19 @@ def run_as_background_process(desc, func, *args, **kwargs): This should be used to wrap processes which are fired off to run in the background, instead of being associated with a particular request. + It returns a Deferred which completes when the function completes, but it doesn't + follow the synapse logcontext rules, which makes it appropriate for passing to + clock.looping_call and friends (or for firing-and-forgetting in the middle of a + normal synapse inlineCallbacks function). + Args: desc (str): a description for this background process type func: a function, which may return a Deferred args: positional args for func kwargs: keyword args for func - Returns: None + Returns: Deferred which returns the result of func, but note that it does not + follow the synapse logcontext rules. """ @defer.inlineCallbacks def run(): @@ -176,4 +182,4 @@ def run_as_background_process(desc, func, *args, **kwargs): _background_processes[desc].remove(proc) with PreserveLoggingContext(): - run() + return run() diff --git a/synapse/rest/media/v1/media_repository.py b/synapse/rest/media/v1/media_repository.py index 5b13378caa..174ad20123 100644 --- a/synapse/rest/media/v1/media_repository.py +++ b/synapse/rest/media/v1/media_repository.py @@ -106,7 +106,7 @@ class MediaRepository(object): ) def _start_update_recently_accessed(self): - run_as_background_process( + return run_as_background_process( "update_recently_accessed_media", self._update_recently_accessed, ) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 4efd5339a4..27aa0def2f 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -373,7 +373,7 @@ class PreviewUrlResource(Resource): }) def _start_expire_url_cache_data(self): - run_as_background_process( + return run_as_background_process( "expire_url_cache_data", self._expire_url_cache_data, ) diff --git a/synapse/storage/client_ips.py b/synapse/storage/client_ips.py index 77ae10da3d..b8cefd43d6 100644 --- a/synapse/storage/client_ips.py +++ b/synapse/storage/client_ips.py @@ -102,7 +102,7 @@ class ClientIpStore(background_updates.BackgroundUpdateStore): to_update, ) - run_as_background_process( + return run_as_background_process( "update_client_ips", update, ) diff --git a/synapse/storage/devices.py b/synapse/storage/devices.py index 52dccb1507..c0943ecf91 100644 --- a/synapse/storage/devices.py +++ b/synapse/storage/devices.py @@ -712,7 +712,7 @@ class DeviceStore(SQLBaseStore): logger.info("Pruned %d device list outbound pokes", txn.rowcount) - run_as_background_process( + return run_as_background_process( "prune_old_outbound_device_pokes", self.runInteraction, "_prune_old_outbound_device_pokes", diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 65f2d19e20..f269ec6fb3 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -549,7 +549,7 @@ class EventFederationStore(EventFederationWorkerStore): sql, (self.stream_ordering_month_ago, self.stream_ordering_month_ago,) ) - run_as_background_process( + return run_as_background_process( "delete_old_forward_extrem_cache", self.runInteraction, "_delete_old_forward_extrem_cache", diff --git a/synapse/storage/event_push_actions.py b/synapse/storage/event_push_actions.py index 4f44b0ad47..6840320641 100644 --- a/synapse/storage/event_push_actions.py +++ b/synapse/storage/event_push_actions.py @@ -460,7 +460,7 @@ class EventPushActionsWorkerStore(SQLBaseStore): ) def _find_stream_orderings_for_times(self): - run_as_background_process( + return run_as_background_process( "event_push_action_stream_orderings", self.runInteraction, "_find_stream_orderings_for_times", @@ -790,7 +790,7 @@ class EventPushActionsStore(EventPushActionsWorkerStore): """, (room_id, user_id, stream_ordering)) def _start_rotate_notifs(self): - run_as_background_process("rotate_notifs", self._rotate_notifs) + return run_as_background_process("rotate_notifs", self._rotate_notifs) @defer.inlineCallbacks def _rotate_notifs(self): diff --git a/synapse/storage/transactions.py b/synapse/storage/transactions.py index b4b479d94c..428e7fa36e 100644 --- a/synapse/storage/transactions.py +++ b/synapse/storage/transactions.py @@ -273,7 +273,9 @@ class TransactionStore(SQLBaseStore): return self.cursor_to_dict(txn) def _start_cleanup_transactions(self): - run_as_background_process("cleanup_transactions", self._cleanup_transactions) + return run_as_background_process( + "cleanup_transactions", self._cleanup_transactions, + ) def _cleanup_transactions(self): now = self._clock.time_msec() diff --git a/synapse/util/caches/expiringcache.py b/synapse/util/caches/expiringcache.py index 465adc54a8..ce85b2ae11 100644 --- a/synapse/util/caches/expiringcache.py +++ b/synapse/util/caches/expiringcache.py @@ -64,7 +64,7 @@ class ExpiringCache(object): return def f(): - run_as_background_process( + return run_as_background_process( "prune_cache_%s" % self._cache_name, self._prune_cache, ) From 1b4d73fa520a301ec35ba417d14a4549a44f33e0 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 12:53:51 +0100 Subject: [PATCH 55/64] comment on event_edges --- synapse/storage/schema/full_schemas/16/event_edges.sql | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/schema/full_schemas/16/event_edges.sql b/synapse/storage/schema/full_schemas/16/event_edges.sql index 52eec88357..6b5a5a88fa 100644 --- a/synapse/storage/schema/full_schemas/16/event_edges.sql +++ b/synapse/storage/schema/full_schemas/16/event_edges.sql @@ -37,7 +37,8 @@ CREATE TABLE IF NOT EXISTS event_edges( event_id TEXT NOT NULL, prev_event_id TEXT NOT NULL, room_id TEXT NOT NULL, - is_state BOOL NOT NULL, + is_state BOOL NOT NULL, -- true if this is a prev_state edge rather than a regular + -- event dag edge. UNIQUE (event_id, prev_event_id, room_id, is_state) ); From bd4b25f4d07e07a2da0382cfc59a5a262883c0fc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 13:19:08 +0100 Subject: [PATCH 56/64] Remove some redundant joins on event_edges.room_id We've long passed the point where it's possible to have the same event_id in different tables, so these join conditions are redundant: we can just join on event_id. event_edges is of non-trivial size, and the room_id column is wasteful, so let's stop reading from it. In future, we can stop writing to it, and then drop it. --- synapse/storage/event_federation.py | 13 ++++++------- synapse/storage/events.py | 1 - 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 65f2d19e20..801581dcf7 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -114,9 +114,9 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, sql = ( "SELECT b.event_id, MAX(e.depth) FROM events as e" " INNER JOIN event_edges as g" - " ON g.event_id = e.event_id AND g.room_id = e.room_id" + " ON g.event_id = e.event_id" " INNER JOIN event_backward_extremities as b" - " ON g.prev_event_id = b.event_id AND g.room_id = b.room_id" + " ON g.prev_event_id = b.event_id" " WHERE b.room_id = ? AND g.is_state is ?" " GROUP BY b.event_id" ) @@ -330,8 +330,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, "SELECT depth, prev_event_id FROM event_edges" " INNER JOIN events" " ON prev_event_id = events.event_id" - " AND event_edges.room_id = events.room_id" - " WHERE event_edges.room_id = ? AND event_edges.event_id = ?" + " WHERE event_edges.event_id = ?" " AND event_edges.is_state = ?" " LIMIT ?" ) @@ -365,7 +364,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, txn.execute( query, - (room_id, event_id, False, limit - len(event_results)) + (event_id, False, limit - len(event_results)) ) for row in txn: @@ -402,7 +401,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, query = ( "SELECT prev_event_id FROM event_edges " - "WHERE room_id = ? AND event_id = ? AND is_state = ? " + "WHERE event_id = ? AND is_state = ? " "LIMIT ?" ) @@ -411,7 +410,7 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, for event_id in front: txn.execute( query, - (room_id, event_id, False, limit - len(event_results)) + (event_id, False, limit - len(event_results)) ) for e_id, in txn: diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 200f5ec95f..cb10fdedc0 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -520,7 +520,6 @@ class EventsStore(EventsWorkerStore): iterable=list(new_latest_event_ids), retcols=["prev_event_id"], keyvalues={ - "room_id": room_id, "is_state": False, }, desc="_calculate_new_extremeties", From cf78eaebad818df8a6bbe039a28689860523d2f9 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 13:22:40 +0100 Subject: [PATCH 57/64] changelog --- changelog.d/3613.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3613.misc diff --git a/changelog.d/3613.misc b/changelog.d/3613.misc new file mode 100644 index 0000000000..d9378f6b49 --- /dev/null +++ b/changelog.d/3613.misc @@ -0,0 +1 @@ +Remove some redundant joins on event_edges.room_id From 5c1d301fd9e669b6704d54caeb1e8a3a223ba053 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 14:12:00 +0100 Subject: [PATCH 58/64] Stop populating events.content This field is no longer read from, so we should stop populating it. Once we're happy that this doesn't break everything, and a rollback is unlikely, we can think about dropping the column. --- synapse/storage/events.py | 1 - .../delta/50/make_event_content_nullable.py | 93 +++++++++++++++++++ 2 files changed, 93 insertions(+), 1 deletion(-) create mode 100644 synapse/storage/schema/delta/50/make_event_content_nullable.py diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 200f5ec95f..94515cd153 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -1189,7 +1189,6 @@ class EventsStore(EventsWorkerStore): "type": event.type, "processed": True, "outlier": event.internal_metadata.is_outlier(), - "content": encode_json(event.content).decode("UTF-8"), "origin_server_ts": int(event.origin_server_ts), "received_ts": self._clock.time_msec(), "sender": event.sender, diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/schema/delta/50/make_event_content_nullable.py new file mode 100644 index 0000000000..fa4a289514 --- /dev/null +++ b/synapse/storage/schema/delta/50/make_event_content_nullable.py @@ -0,0 +1,93 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +We want to stop populating 'event.content', so we need to make it nullable. + +If this has to be rolled back, then the following should populate the missing data: + +Postgres: + + UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej + WHERE ej.event_id = events.event_id AND + stream_ordering < ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering LIMIT 1 + ); + + UPDATE events SET content=(ej.json::json)->'content' FROM event_json ej + WHERE ej.event_id = events.event_id AND + stream_ordering > ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering DESC LIMIT 1 + ); + +SQLite: + + UPDATE events SET content=( + SELECT json_extract(json,'$.content') FROM event_json ej + WHERE ej.event_id = events.event_id + ) + WHERE + stream_ordering < ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering LIMIT 1 + ) + OR stream_ordering > ( + SELECT stream_ordering FROM events WHERE content IS NOT NULL + ORDER BY stream_ordering DESC LIMIT 1 + ); + +""" + +import logging + +from synapse.storage.engines import PostgresEngine + +logger = logging.getLogger(__name__) + + +def run_create(cur, database_engine, *args, **kwargs): + if isinstance(database_engine, PostgresEngine): + cur.execute(""" + ALTER TABLE events ALTER COLUMN content DROP NOT NULL; + """) + return + + # sqlite is an arse about this. ref: https://www.sqlite.org/lang_altertable.html + cur.execute("PRAGMA schema_version") + (oldver,) = cur.fetchone() + + cur.execute("SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'") + (oldsql,) = cur.fetchone() + sql = oldsql.replace("content TEXT NOT NULL", "content TEXT") + if sql == oldsql: + raise Exception("Couldn't find null constraint to drop in %s" % oldsql) + + logger.info("Replacing definition of 'events' with: %s", sql) + + cur.execute("PRAGMA writable_schema=ON") + + cur.execute( + "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'", + (sql, ), + ) + + cur.execute("PRAGMA schema_version=%i" % (oldver+1,)) + cur.execute("PRAGMA writable_schema=OFF") + + +def run_upgrade(*args, **kwargs): + pass From 51d7df19158de0f21e659625bf43716ff0a700bc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 14:54:04 +0100 Subject: [PATCH 59/64] Create the column nullable There's no real point in ever making the column non-nullable, and doing so breaks the sytests. --- .../delta/50/make_event_content_nullable.py | 15 +++++++-------- synapse/storage/schema/full_schemas/16/im.sql | 7 ++++++- 2 files changed, 13 insertions(+), 9 deletions(-) diff --git a/synapse/storage/schema/delta/50/make_event_content_nullable.py b/synapse/storage/schema/delta/50/make_event_content_nullable.py index fa4a289514..7d27342e39 100644 --- a/synapse/storage/schema/delta/50/make_event_content_nullable.py +++ b/synapse/storage/schema/delta/50/make_event_content_nullable.py @@ -60,6 +60,10 @@ logger = logging.getLogger(__name__) def run_create(cur, database_engine, *args, **kwargs): + pass + + +def run_upgrade(cur, database_engine, *args, **kwargs): if isinstance(database_engine, PostgresEngine): cur.execute(""" ALTER TABLE events ALTER COLUMN content DROP NOT NULL; @@ -67,27 +71,22 @@ def run_create(cur, database_engine, *args, **kwargs): return # sqlite is an arse about this. ref: https://www.sqlite.org/lang_altertable.html - cur.execute("PRAGMA schema_version") - (oldver,) = cur.fetchone() cur.execute("SELECT sql FROM sqlite_master WHERE tbl_name='events' AND type='table'") (oldsql,) = cur.fetchone() + sql = oldsql.replace("content TEXT NOT NULL", "content TEXT") if sql == oldsql: raise Exception("Couldn't find null constraint to drop in %s" % oldsql) logger.info("Replacing definition of 'events' with: %s", sql) + cur.execute("PRAGMA schema_version") + (oldver,) = cur.fetchone() cur.execute("PRAGMA writable_schema=ON") - cur.execute( "UPDATE sqlite_master SET sql=? WHERE tbl_name='events' AND type='table'", (sql, ), ) - cur.execute("PRAGMA schema_version=%i" % (oldver+1,)) cur.execute("PRAGMA writable_schema=OFF") - - -def run_upgrade(*args, **kwargs): - pass diff --git a/synapse/storage/schema/full_schemas/16/im.sql b/synapse/storage/schema/full_schemas/16/im.sql index ba5346806e..5f5cb8d01d 100644 --- a/synapse/storage/schema/full_schemas/16/im.sql +++ b/synapse/storage/schema/full_schemas/16/im.sql @@ -19,7 +19,12 @@ CREATE TABLE IF NOT EXISTS events( event_id TEXT NOT NULL, type TEXT NOT NULL, room_id TEXT NOT NULL, - content TEXT NOT NULL, + + -- 'content' used to be created NULLable, but as of delta 50 we drop that constraint. + -- the hack we use to drop the constraint doesn't work for an in-memory sqlite + -- database, which breaks the sytests. Hence, we no longer make it nullable. + content TEXT, + unrecognized_keys TEXT, processed BOOL NOT NULL, outlier BOOL NOT NULL, From 85531a06a27b0168b84d79fcc5102d2c204ba330 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 26 Jul 2018 14:55:29 +0100 Subject: [PATCH 60/64] changelog --- changelog.d/3614.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3614.misc diff --git a/changelog.d/3614.misc b/changelog.d/3614.misc new file mode 100644 index 0000000000..356f28471e --- /dev/null +++ b/changelog.d/3614.misc @@ -0,0 +1 @@ +Stop populating events.content From 7d32f0d745da6cbc2ec4bb28318488dfcab7b930 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 26 Jul 2018 14:41:59 -0600 Subject: [PATCH 61/64] Update the send_leave path to be an event_id It's still not used, however the parameter is an event ID not a transaction ID. --- synapse/federation/transport/server.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index c9beca27c2..8574898f0c 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -404,10 +404,10 @@ class FederationMakeLeaveServlet(BaseFederationServlet): class FederationSendLeaveServlet(BaseFederationServlet): - PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" + PATH = "/send_leave/(?P[^/]*)/(?P[^/]*)" @defer.inlineCallbacks - def on_PUT(self, origin, content, query, room_id, txid): + def on_PUT(self, origin, content, query, room_id, event_id): content = yield self.handler.on_send_leave_request(origin, content) defer.returnValue((200, content)) From 49254d43a62e5ace114d8f41c3959063391a1b26 Mon Sep 17 00:00:00 2001 From: Travis Ralston Date: Thu, 26 Jul 2018 14:45:48 -0600 Subject: [PATCH 62/64] Create 3616.misc --- changelog.d/3616.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/3616.misc diff --git a/changelog.d/3616.misc b/changelog.d/3616.misc new file mode 100644 index 0000000000..04629faa36 --- /dev/null +++ b/changelog.d/3616.misc @@ -0,0 +1 @@ +Update the /send_leave path registration to use event_id rather than a transaction ID. From a75231b507e025eaaa4f06d8932c04fa4e942d48 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 26 Jul 2018 22:51:30 +0100 Subject: [PATCH 63/64] Deduplicate redundant lazy-loaded members (#3331) * attempt at deduplicating lazy-loaded members as per the proposal; we can deduplicate redundant lazy-loaded members which are sent in the same sync sequence. we do this heuristically rather than requiring the client to somehow tell us which members it has chosen to cache, by instead caching the last N members sent to a client, and not sending them again. For now we hardcode N to 100. Each cache for a given (user,device) tuple is in turn cached for up to X minutes (to avoid the caches building up). For now we hardcode X to 30. * add include_redundant_members filter option & make it work * remove stale todo * add tests for _get_some_state_from_cache * incorporate review --- changelog.d/3331.feature | 1 + synapse/api/filtering.py | 9 +++++ synapse/handlers/sync.py | 87 ++++++++++++++++++++++++++++------------ 3 files changed, 72 insertions(+), 25 deletions(-) create mode 100644 changelog.d/3331.feature diff --git a/changelog.d/3331.feature b/changelog.d/3331.feature new file mode 100644 index 0000000000..e574b9bcc3 --- /dev/null +++ b/changelog.d/3331.feature @@ -0,0 +1 @@ +add support for the include_redundant_members filter param as per MSC1227 diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 7e767b9bf5..186831e118 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -117,6 +117,9 @@ ROOM_EVENT_FILTER_SCHEMA = { "lazy_load_members": { "type": "boolean" }, + "include_redundant_members": { + "type": "boolean" + }, } } @@ -267,6 +270,9 @@ class FilterCollection(object): def lazy_load_members(self): return self._room_state_filter.lazy_load_members() + def include_redundant_members(self): + return self._room_state_filter.include_redundant_members() + def filter_presence(self, events): return self._presence_filter.filter(events) @@ -426,6 +432,9 @@ class Filter(object): def lazy_load_members(self): return self.filter_json.get("lazy_load_members", False) + def include_redundant_members(self): + return self.filter_json.get("include_redundant_members", False) + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4ced3144c8..dff1f67dcb 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -26,6 +26,8 @@ from synapse.api.constants import EventTypes, Membership from synapse.push.clientformat import format_push_rules_for_user from synapse.types import RoomStreamToken from synapse.util.async import concurrently_execute +from synapse.util.caches.expiringcache import ExpiringCache +from synapse.util.caches.lrucache import LruCache from synapse.util.caches.response_cache import ResponseCache from synapse.util.logcontext import LoggingContext from synapse.util.metrics import Measure, measure_func @@ -33,6 +35,14 @@ from synapse.visibility import filter_events_for_client logger = logging.getLogger(__name__) +# Store the cache that tracks which lazy-loaded members have been sent to a given +# client for no more than 30 minutes. +LAZY_LOADED_MEMBERS_CACHE_MAX_AGE = 30 * 60 * 1000 + +# Remember the last 100 members we sent to a client for the purposes of +# avoiding redundantly sending the same lazy-loaded members to the client +LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE = 100 + SyncConfig = collections.namedtuple("SyncConfig", [ "user", @@ -182,6 +192,12 @@ class SyncHandler(object): self.response_cache = ResponseCache(hs, "sync") self.state = hs.get_state_handler() + # ExpiringCache((User, Device)) -> LruCache(state_key => event_id) + self.lazy_loaded_members_cache = ExpiringCache( + "lazy_loaded_members_cache", self.clock, + max_len=0, expiry_ms=LAZY_LOADED_MEMBERS_CACHE_MAX_AGE, + ) + def wait_for_sync_for_user(self, sync_config, since_token=None, timeout=0, full_state=False): """Get the sync for a client if we have new data for it now. Otherwise @@ -505,9 +521,13 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None - lazy_load_members = sync_config.filter_collection.lazy_load_members() filtered_types = None + lazy_load_members = sync_config.filter_collection.lazy_load_members() + include_redundant_members = ( + sync_config.filter_collection.include_redundant_members() + ) + if lazy_load_members: # We only request state for the members needed to display the # timeline: @@ -523,6 +543,11 @@ class SyncHandler(object): # only apply the filtering to room members filtered_types = [EventTypes.Member] + timeline_state = { + (event.type, event.state_key): event.event_id + for event in batch.events if event.is_state() + } + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( @@ -543,11 +568,6 @@ class SyncHandler(object): state_ids = current_state_ids - timeline_state = { - (event.type, event.state_key): event.event_id - for event in batch.events if event.is_state() - } - state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, @@ -571,21 +591,6 @@ class SyncHandler(object): filtered_types=filtered_types, ) - timeline_state = { - (event.type, event.state_key): event.event_id - for event in batch.events if event.is_state() - } - - # TODO: optionally filter out redundant membership events at this - # point, to stop repeatedly sending members in every /sync as if - # the client isn't tracking them. - # When implemented, this should filter using event_ids (not mxids). - # In practice, limited syncs are - # relatively rare so it's not a total disaster to send redundant - # members down at this point. Redundant members are ones which - # repeatedly get sent down /sync because we don't know if the client - # is caching them or not. - state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, @@ -596,16 +601,48 @@ class SyncHandler(object): else: state_ids = {} if lazy_load_members: - # TODO: filter out redundant members based on their mxids (not their - # event_ids) at this point. We know we can do it based on mxid as this - # is an non-gappy incremental sync. - if types: state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, filtered_types=filtered_types, ) + if lazy_load_members and not include_redundant_members: + cache_key = (sync_config.user.to_string(), sync_config.device_id) + cache = self.lazy_loaded_members_cache.get(cache_key) + if cache is None: + logger.debug("creating LruCache for %r", cache_key) + cache = LruCache(LAZY_LOADED_MEMBERS_CACHE_MAX_SIZE) + self.lazy_loaded_members_cache[cache_key] = cache + else: + logger.debug("found LruCache for %r", cache_key) + + # if it's a new sync sequence, then assume the client has had + # amnesia and doesn't want any recent lazy-loaded members + # de-duplicated. + if since_token is None: + logger.debug("clearing LruCache for %r", cache_key) + cache.clear() + else: + # only send members which aren't in our LruCache (either + # because they're new to this client or have been pushed out + # of the cache) + logger.debug("filtering state from %r...", state_ids) + state_ids = { + t: event_id + for t, event_id in state_ids.iteritems() + if cache.get(t[1]) != event_id + } + logger.debug("...to %r", state_ids) + + # add any member IDs we are about to send into our LruCache + for t, event_id in itertools.chain( + state_ids.items(), + timeline_state.items(), + ): + if t[0] == EventTypes.Member: + cache.set(t[1], event_id) + state = {} if state_ids: state = yield self.store.get_events(list(state_ids.values())) From e9b2d047f68b74e231609ce40978f4452ac9e22f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 27 Jul 2018 15:12:50 +0100 Subject: [PATCH 64/64] make /context lazyload & filter aware (#3567) make /context lazyload & filter aware. --- changelog.d/3567.feature | 1 + synapse/handlers/room.py | 24 +++++++++++++++++++++--- synapse/handlers/search.py | 2 +- synapse/rest/client/v1/room.py | 9 +++++++++ synapse/storage/stream.py | 14 +++++++++++--- 5 files changed, 43 insertions(+), 7 deletions(-) create mode 100644 changelog.d/3567.feature diff --git a/changelog.d/3567.feature b/changelog.d/3567.feature new file mode 100644 index 0000000000..c74c1f57a9 --- /dev/null +++ b/changelog.d/3567.feature @@ -0,0 +1 @@ +make the /context API filter & lazy-load aware as per MSC1227 diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 003b848c00..7b7804d9b2 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -15,6 +15,7 @@ # limitations under the License. """Contains functions for performing events on rooms.""" +import itertools import logging import math import string @@ -401,7 +402,7 @@ class RoomContextHandler(object): self.store = hs.get_datastore() @defer.inlineCallbacks - def get_event_context(self, user, room_id, event_id, limit): + def get_event_context(self, user, room_id, event_id, limit, event_filter): """Retrieves events, pagination tokens and state around a given event in a room. @@ -411,6 +412,8 @@ class RoomContextHandler(object): event_id (str) limit (int): The maximum number of events to return in total (excluding state). + event_filter (Filter|None): the filter to apply to the events returned + (excluding the target event_id) Returns: dict, or None if the event isn't found @@ -443,7 +446,7 @@ class RoomContextHandler(object): ) results = yield self.store.get_events_around( - room_id, event_id, before_limit, after_limit + room_id, event_id, before_limit, after_limit, event_filter ) results["events_before"] = yield filter_evts(results["events_before"]) @@ -455,8 +458,23 @@ class RoomContextHandler(object): else: last_event_id = event_id + types = None + filtered_types = None + if event_filter and event_filter.lazy_load_members(): + members = set(ev.sender for ev in itertools.chain( + results["events_before"], + (results["event"],), + results["events_after"], + )) + filtered_types = [EventTypes.Member] + types = [(EventTypes.Member, member) for member in members] + + # XXX: why do we return the state as of the last event rather than the + # first? Shouldn't we be consistent with /sync? + # https://github.com/matrix-org/matrix-doc/issues/687 + state = yield self.store.get_state_for_events( - [last_event_id], None + [last_event_id], types, filtered_types=filtered_types, ) results["state"] = list(state[last_event_id].values()) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 69ae9731d5..c464adbd0b 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -287,7 +287,7 @@ class SearchHandler(BaseHandler): contexts = {} for event in allowed_events: res = yield self.store.get_events_around( - event.room_id, event.event_id, before_limit, after_limit + event.room_id, event.event_id, before_limit, after_limit, ) logger.info( diff --git a/synapse/rest/client/v1/room.py b/synapse/rest/client/v1/room.py index b7bd878c90..13c331550b 100644 --- a/synapse/rest/client/v1/room.py +++ b/synapse/rest/client/v1/room.py @@ -531,11 +531,20 @@ class RoomEventContextServlet(ClientV1RestServlet): limit = parse_integer(request, "limit", default=10) + # picking the API shape for symmetry with /messages + filter_bytes = parse_string(request, "filter") + if filter_bytes: + filter_json = urlparse.unquote(filter_bytes).decode("UTF-8") + event_filter = Filter(json.loads(filter_json)) + else: + event_filter = None + results = yield self.room_context_handler.get_event_context( requester.user, room_id, event_id, limit, + event_filter, ) if not results: diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 66856342f0..25d0097b58 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -527,7 +527,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): ) @defer.inlineCallbacks - def get_events_around(self, room_id, event_id, before_limit, after_limit): + def get_events_around( + self, room_id, event_id, before_limit, after_limit, event_filter=None, + ): """Retrieve events and pagination tokens around a given event in a room. @@ -536,6 +538,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_id (str) before_limit (int) after_limit (int) + event_filter (Filter|None) Returns: dict @@ -543,7 +546,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): results = yield self.runInteraction( "get_events_around", self._get_events_around_txn, - room_id, event_id, before_limit, after_limit + room_id, event_id, before_limit, after_limit, event_filter, ) events_before = yield self._get_events( @@ -563,7 +566,9 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): "end": results["after"]["token"], }) - def _get_events_around_txn(self, txn, room_id, event_id, before_limit, after_limit): + def _get_events_around_txn( + self, txn, room_id, event_id, before_limit, after_limit, event_filter, + ): """Retrieves event_ids and pagination tokens around a given event in a room. @@ -572,6 +577,7 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): event_id (str) before_limit (int) after_limit (int) + event_filter (Filter|None) Returns: dict @@ -601,11 +607,13 @@ class StreamWorkerStore(EventsWorkerStore, SQLBaseStore): rows, start_token = self._paginate_room_events_txn( txn, room_id, before_token, direction='b', limit=before_limit, + event_filter=event_filter, ) events_before = [r.event_id for r in rows] rows, end_token = self._paginate_room_events_txn( txn, room_id, after_token, direction='f', limit=after_limit, + event_filter=event_filter, ) events_after = [r.event_id for r in rows]