From 9b334b3f97057ac145622d2e4d0ad036ef27b468 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:01:41 +0000 Subject: [PATCH 01/48] WIP experiment in lazyloading room members --- synapse/handlers/sync.py | 43 ++++++++++++++++++++++++++---------- synapse/storage/state.py | 47 +++++++++++++++++++++++++++++++++++----- 2 files changed, 73 insertions(+), 17 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0f713ce038..809e9fece9 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -399,7 +399,7 @@ class SyncHandler(object): )) @defer.inlineCallbacks - def get_state_after_event(self, event): + def get_state_after_event(self, event, types=None): """ Get the room state after the given event @@ -409,14 +409,14 @@ class SyncHandler(object): Returns: A Deferred map from ((type, state_key)->Event) """ - state_ids = yield self.store.get_state_ids_for_event(event.event_id) + state_ids = yield self.store.get_state_ids_for_event(event.event_id, types) if event.is_state(): state_ids = state_ids.copy() state_ids[(event.type, event.state_key)] = event.event_id defer.returnValue(state_ids) @defer.inlineCallbacks - def get_state_at(self, room_id, stream_position): + def get_state_at(self, room_id, stream_position, types=None): """ Get the room state at a particular stream position Args: @@ -432,7 +432,7 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] - state = yield self.get_state_after_event(last_event) + state = yield self.get_state_after_event(last_event, types) else: # no events in this room - so presumably no state @@ -441,7 +441,7 @@ class SyncHandler(object): @defer.inlineCallbacks def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token, - full_state): + full_state, filter_members): """ Works out the differnce in state between the start of the timeline and the previous sync. @@ -454,6 +454,8 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. + filter_members(bool): Whether to only return state for members + referenced in this timeline segment Returns: A deferred new event dictionary @@ -464,18 +466,35 @@ class SyncHandler(object): # TODO(mjark) Check for new redactions in the state events. with Measure(self.clock, "compute_state_delta"): + + types = None + if filter_members: + # We only request state for the members needed to display the + # timeline: + types = ( + (EventTypes.Member, state_key) + for state_key in set( + event.sender # FIXME: we also care about targets etc. + for event in batch.events + ) + ) + types.append((None, None)) # don't just filter to room members + + # TODO: we should opportunistically deduplicate these members too + # within the same sync series (based on an in-memory cache) + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id + batch.events[-1].event_id, types=types ) state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id + batch.events[0].event_id, types=types ) else: current_state_ids = yield self.get_state_at( - room_id, stream_position=now_token + room_id, stream_position=now_token, types=types ) state_ids = current_state_ids @@ -493,15 +512,15 @@ class SyncHandler(object): ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( - room_id, stream_position=since_token + room_id, stream_position=since_token, types=types ) current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id + batch.events[-1].event_id, types=types ) state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id + batch.events[0].event_id, types=types ) timeline_state = { @@ -1325,7 +1344,7 @@ class SyncHandler(object): state = yield self.compute_state_delta( room_id, batch, sync_config, since_token, now_token, - full_state=full_state + full_state=full_state, filter_members=True ) if room_builder.rtype == "joined": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..da6bb685fa 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -198,8 +198,15 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_state_groups_from_groups_txn(self, txn, groups, types=None): results = {group: {} for group in groups} + + include_other_types = False + if types is not None: - types = list(set(types)) # deduplicate types list + type_set = set(types) + if (None, None) in type_set: + include_other_types = True + type_set.remove((None, None)) + types = list(type_set) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -238,11 +245,21 @@ class StateGroupWorkerStore(SQLBaseStore): if types: clause_to_args = [ ( - "AND type = ? AND state_key = ?", - (etype, state_key) + "AND type = ? AND state_key = ?" if state_key is not None else "AND type = ?", + (etype, state_key) if state_key is not None else (etype) ) for etype, state_key in types ] + + if include_other_types: + # XXX: check whether this slows postgres down like a list of + # ORs does too? + clause_to_args.append( + ( + "AND type <> ? " * len(types), + [t for (t, _) in types] + ) + ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -263,6 +280,10 @@ class StateGroupWorkerStore(SQLBaseStore): where_clause = "AND (%s)" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) + if include_other_types: + where_clause += " AND (%s)" % ( + " AND ".join(["type <> ?"] * len(types)), + ) else: where_clause = "" @@ -449,17 +470,27 @@ class StateGroupWorkerStore(SQLBaseStore): group: The state group to lookup types (list): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the - `type`. + `type`. Presence of type of `None` indicates that types not + in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} missing_types = set() + include_other_types = False + for typ, state_key in types: key = (typ, state_key) + + if typ is None: + include_other_types = True + next + if state_key is None: type_to_key[typ] = None + # XXX: why do we mark the type as missing from our cache just + # because we weren't filtering on a specific value of state_key? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -478,7 +509,7 @@ class StateGroupWorkerStore(SQLBaseStore): return True if state_key in valid_state_keys: return True - return False + return include_other_types got_all = is_all or not missing_types @@ -507,6 +538,12 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. + + XXX: is it really true that `state_key` of None in `types` matches all + state_keys? it looks like _get-some_state_from_cache does the right thing, + but _get_state_groups_from_groups_txn treats ths None is turned into + 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew + I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) From 87133652657c5073616419b0afc533eac6ae6750 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 11 Mar 2018 20:10:25 +0000 Subject: [PATCH 02/48] typos --- synapse/handlers/sync.py | 4 ++-- synapse/storage/state.py | 2 ++ 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 809e9fece9..fa730ca760 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -471,13 +471,13 @@ class SyncHandler(object): if filter_members: # We only request state for the members needed to display the # timeline: - types = ( + types = [ (EventTypes.Member, state_key) for state_key in set( event.sender # FIXME: we also care about targets etc. for event in batch.events ) - ) + ] types.append((None, None)) # don't just filter to room members # TODO: we should opportunistically deduplicate these members too diff --git a/synapse/storage/state.py b/synapse/storage/state.py index da6bb685fa..0238200286 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -301,6 +301,8 @@ class StateGroupWorkerStore(SQLBaseStore): args = [next_group] if types: args.extend(i for typ in types for i in typ) + if include_other_types: + args.extend(typ for (typ, _) in types) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" From 97c0496cfa89b037d89fccd05dd03442b80e07fc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 00:27:06 +0000 Subject: [PATCH 03/48] fix sqlite where clause --- synapse/storage/state.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 0238200286..b796d3c995 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -277,13 +277,14 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: if types is not None: - where_clause = "AND (%s)" % ( + where_clause = "AND (%s" % ( " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), ) if include_other_types: - where_clause += " AND (%s)" % ( + where_clause += " OR (%s)" % ( " AND ".join(["type <> ?"] * len(types)), ) + where_clause += ")" else: where_clause = "" From fdedcd1f4ddeaa3ed5bfd3c05ab2977b4e8ed457 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 12 Mar 2018 01:39:06 +0000 Subject: [PATCH 04/48] correctly handle None state_keys and fix include_other_types thinko --- synapse/storage/state.py | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b796d3c995..405e6b6770 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -276,15 +276,23 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?) OR " + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?) OR " + where_args.extend([typ[0], typ[1]]) + if include_other_types: - where_clause += " OR (%s)" % ( + where_clause += "(%s) OR " % ( " AND ".join(["type <> ?"] * len(types)), ) - where_clause += ")" + where_args.extend(t for (t, _) in types) + where_clause += "0)" # 0 to terminate the last OR else: where_clause = "" @@ -301,9 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) - if include_other_types: - args.extend(typ for (typ, _) in types) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" @@ -507,12 +513,12 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return False + return include_other_types if valid_state_keys is None: return True if state_key in valid_state_keys: return True - return include_other_types + return False got_all = is_all or not missing_types From 1b1c13777154b5b0cf8bf8cf809381f889a2a82d Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 17:52:52 +0000 Subject: [PATCH 05/48] fix bug #2926 --- synapse/storage/state.py | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 2b325e1c1f..783cebb351 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,6 +240,10 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) + ) if state_key is not None else + ( + "AND type = ?", + (etype) ) for etype, state_key in types ] @@ -259,10 +263,19 @@ class StateGroupWorkerStore(SQLBaseStore): key = (typ, state_key) results[group][key] = event_id else: + where_args = [] if types is not None: - where_clause = "AND (%s)" % ( - " OR ".join(["(type = ? AND state_key = ?)"] * len(types)), - ) + where_clause = "AND (" + for typ in types: + if typ[1] is None: + where_clause += "(type = ?)" + where_args.extend(typ[0]) + else: + where_clause += "(type = ? AND state_key = ?)" + where_args.extend([typ[0], typ[1]]) + if typ != types[-1]: + where_clause += " OR " + where_clause += ")" else: where_clause = "" @@ -279,7 +292,7 @@ class StateGroupWorkerStore(SQLBaseStore): # after we finish deduping state, which requires this func) args = [next_group] if types: - args.extend(i for typ in types for i in typ) + args.extend(where_args) txn.execute( "SELECT type, state_key, event_id FROM state_groups_state" From 52f7e23c7276b2848aa5291d8b78875b7c32a658 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:07:55 +0000 Subject: [PATCH 06/48] PR feedbackz --- synapse/storage/state.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 783cebb351..77259a3141 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -240,10 +240,9 @@ class StateGroupWorkerStore(SQLBaseStore): ( "AND type = ? AND state_key = ?", (etype, state_key) - ) if state_key is not None else - ( + ) if state_key is not None else ( "AND type = ?", - (etype) + (etype,) ) for etype, state_key in types ] From b2aba9e43053f9f297671fce0051bfc18a8b655a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 18:13:44 +0000 Subject: [PATCH 07/48] build where_clause sanely --- synapse/storage/state.py | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 77259a3141..82740266bf 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -263,18 +263,16 @@ class StateGroupWorkerStore(SQLBaseStore): results[group][key] = event_id else: where_args = [] + where_clauses = [] if types is not None: - where_clause = "AND (" for typ in types: if typ[1] is None: - where_clause += "(type = ?)" + where_clauses.append("(type = ?)") where_args.extend(typ[0]) else: - where_clause += "(type = ? AND state_key = ?)" + where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if typ != types[-1]: - where_clause += " OR " - where_clause += ")" + where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" From 865377a70d9d7db27b89348d2ebbd394f701c490 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:45:36 +0000 Subject: [PATCH 08/48] disable optimisation for searching for state groups when type filter includes wildcards on state_key --- synapse/storage/state.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 82740266bf..39f73afaa2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -264,11 +264,13 @@ class StateGroupWorkerStore(SQLBaseStore): else: where_args = [] where_clauses = [] + wildcard_types = False if types is not None: for typ in types: if typ[1] is None: where_clauses.append("(type = ?)") where_args.extend(typ[0]) + wildcard_types = True else: where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) @@ -302,9 +304,17 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the lengths match then we must have all the types, - # so no need to go walk further down the tree. - if types is not None and len(results[group]) == len(types): + # If the number of entries inthe (type,state_key)->event_id dict + # matches the number of (type,state_keys) types we were searching + # for, then we must have found them all, so no need to go walk + # further down the tree... UNLESS our types filter contained + # wildcards (i.e. Nones) in which case we have to do an exhaustive + # search + if ( + types is not None and + not wildcard_types and + len(results[group]) == len(types) + ): break next_group = self._simple_select_one_onecol_txn( From afbf4d3dccb3d18276e4b119b0267490ca522b4b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 19:48:04 +0000 Subject: [PATCH 09/48] typoe --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 39f73afaa2..ffa4246031 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -304,7 +304,7 @@ class StateGroupWorkerStore(SQLBaseStore): if (typ, state_key) not in results[group] ) - # If the number of entries inthe (type,state_key)->event_id dict + # If the number of entries in the (type,state_key)->event_id dict # matches the number of (type,state_keys) types we were searching # for, then we must have found them all, so no need to go walk # further down the tree... UNLESS our types filter contained From 14a9d2f73d50225f190f42e270cbf9ef7447bd8c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:03:42 +0000 Subject: [PATCH 10/48] ensure we always include the members for a given timeline block --- synapse/handlers/sync.py | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index fa730ca760..c754cfdeeb 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -468,6 +468,8 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None + member_state_ids = {} + if filter_members: # We only request state for the members needed to display the # timeline: @@ -492,6 +494,13 @@ class SyncHandler(object): state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types ) + + if filter_members: + member_state_ids = { + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.member + } + else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types @@ -499,6 +508,12 @@ class SyncHandler(object): state_ids = current_state_ids + if filter_members: + member_state_ids = { + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.member + } + timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -507,6 +522,7 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, + timeline_start_members=member_state_ids, previous={}, current=current_state_ids, ) @@ -523,6 +539,12 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) + if filter_members: + member_state_ids = { + t: state_at_timeline_start[t] + for t in state_ids if t[0] == EventTypes.member + } + timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -531,6 +553,7 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, + timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, ) @@ -1440,12 +1463,16 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, previous, current): +def _calculate_state(timeline_contains, timeline_start, timeline_start_members, + previous, current): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline + timeline_start_members (dict): state at the start of the timeline + for room members who participate in this chunk of timeline. + Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1464,11 +1491,12 @@ def _calculate_state(timeline_contains, timeline_start, previous, current): } c_ids = set(e for e in current.values()) - tc_ids = set(e for e in timeline_contains.values()) - p_ids = set(e for e in previous.values()) ts_ids = set(e for e in timeline_start.values()) + tsm_ids = set(e for e in timeline_start_members.values()) + p_ids = set(e for e in previous.values()) + tc_ids = set(e for e in timeline_contains.values()) - state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids return { event_id_to_key[e]: e for e in state_ids From f0f9a0605b1bddc1b01d1bbb6af93f00763b8496 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:12:15 +0000 Subject: [PATCH 11/48] remove comment now #2969 is fixed --- synapse/storage/state.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4ab16e18b2..4291cde7ab 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -561,12 +561,6 @@ class StateGroupWorkerStore(SQLBaseStore): with matching types. `types` is a list of `(type, state_key)`, where a `state_key` of None matches all state_keys. If `types` is None then all events are returned. - - XXX: is it really true that `state_key` of None in `types` matches all - state_keys? it looks like _get-some_state_from_cache does the right thing, - but _get_state_groups_from_groups_txn treats ths None is turned into - 'AND state_key = NULL' or similar (at least until i just fixed it) --Matthew - I've filed this as https://github.com/matrix-org/synapse/issues/2969 """ if types: types = frozenset(types) From ccca02846d07124f537b0c475308f9a26bfb3fb1 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 22:31:41 +0000 Subject: [PATCH 12/48] make it work --- synapse/handlers/sync.py | 6 +++--- synapse/storage/state.py | 10 ++++++---- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c754cfdeeb..c05e3d107f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -498,7 +498,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } else: @@ -511,7 +511,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { @@ -542,7 +542,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_ids if t[0] == EventTypes.member + for t in state_ids if t[0] == EventTypes.Member } timeline_state = { diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 4291cde7ab..9c9994c073 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,10 +257,11 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? + unique_types = set([ t for (t, _) in types ]) clause_to_args.append( ( - "AND type <> ? " * len(types), - [t for (t, _) in types] + "AND type <> ? " * len(unique_types), + list(unique_types) ) ) else: @@ -293,10 +294,11 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: + unique_types = set([ t for (t, _) in types ]) where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(types)) + ")" + "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) - where_args.extend(t for (t, _) in types) + where_args.extend(list(unique_types)) where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: From c9d72e4571752554dfe01d755ae23f55c5f84ade Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 13 Mar 2018 23:46:45 +0000 Subject: [PATCH 13/48] oops --- synapse/handlers/sync.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index c05e3d107f..887624c431 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -542,7 +542,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_at_timeline_start if t[0] == EventTypes.Member } timeline_state = { From 4d0cfef6ee023bfe83113a0378321830ebde1619 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Mar 2018 00:02:20 +0000 Subject: [PATCH 14/48] add copyright to nudge CI --- synapse/handlers/sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 887624c431..edbd2ae771 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1,5 +1,6 @@ # -*- coding: utf-8 -*- -# Copyright 2015 - 2016 OpenMarket Ltd +# Copyright 2015, 2016 OpenMarket Ltd +# Copyright 2018 New Vector Ltd # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. From 9f77001e2747c36046e136c5d3c706c0aef54b15 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 14 Mar 2018 00:07:47 +0000 Subject: [PATCH 15/48] pep8 --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 9c9994c073..55159e64d0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -257,7 +257,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -294,7 +294,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([ t for (t, _) in types ]) + unique_types = set([t for (t, _) in types]) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) From 3bc5bd2d22e6b53ec1f89760301df1517e71b53a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 00:52:04 +0000 Subject: [PATCH 16/48] make incr syncs work --- synapse/handlers/sync.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index edbd2ae771..84c894ca4a 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -499,7 +499,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_ids if state_ids[t][0] == EventTypes.Member } else: @@ -512,7 +512,7 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member + for t in state_ids if state_ids[t][0] == EventTypes.Member } timeline_state = { @@ -543,7 +543,8 @@ class SyncHandler(object): if filter_members: member_state_ids = { t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member + for t in state_at_timeline_start + if state_at_timeline_start[t][0] == EventTypes.Member } timeline_state = { From bf49d2dca8db6d82f09441a35cd3655c746b6b4d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 15 Mar 2018 23:38:43 +0000 Subject: [PATCH 17/48] Replace some ujson with simplejson to make it work --- synapse/http/server.py | 3 ++- synapse/rest/client/v2_alpha/sync.py | 2 +- synapse/storage/events.py | 2 +- synapse/storage/events_worker.py | 2 +- 4 files changed, 5 insertions(+), 4 deletions(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 4b567215c8..3c7a0ef97a 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -38,6 +38,7 @@ import collections import logging import urllib import ujson +import simplejson logger = logging.getLogger(__name__) @@ -462,7 +463,7 @@ def respond_with_json(request, code, json_object, send_cors=False, json_bytes = encode_canonical_json(json_object) else: # ujson doesn't like frozen_dicts. - json_bytes = ujson.dumps(json_object, ensure_ascii=False) + json_bytes = simplejson.dumps(json_object) return respond_with_json_bytes( request, code, json_bytes, diff --git a/synapse/rest/client/v2_alpha/sync.py b/synapse/rest/client/v2_alpha/sync.py index a0a8e4b8e4..eb91c0b293 100644 --- a/synapse/rest/client/v2_alpha/sync.py +++ b/synapse/rest/client/v2_alpha/sync.py @@ -33,7 +33,7 @@ from ._base import set_timeline_upper_limit import itertools import logging -import ujson as json +import simplejson as json logger = logging.getLogger(__name__) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 3890878170..9fc65229fd 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -38,7 +38,7 @@ from functools import wraps import synapse.metrics import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 diff --git a/synapse/storage/events_worker.py b/synapse/storage/events_worker.py index 86c3b48ad4..2e23dd78ba 100644 --- a/synapse/storage/events_worker.py +++ b/synapse/storage/events_worker.py @@ -28,7 +28,7 @@ from synapse.api.errors import SynapseError from collections import namedtuple import logging -import ujson as json +import simplejson as json # these are only included to make the type annotations work from synapse.events import EventBase # noqa: F401 From 5b3b3aada8952b53f82723227c9758ed47450a2e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:17:34 +0000 Subject: [PATCH 18/48] simplify timeline_start_members --- synapse/handlers/sync.py | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 84c894ca4a..ffb4f7915e 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -496,12 +496,6 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) - if filter_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if state_ids[t][0] == EventTypes.Member - } - else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types @@ -509,11 +503,13 @@ class SyncHandler(object): state_ids = current_state_ids - if filter_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if state_ids[t][0] == EventTypes.Member - } + if filter_members: + logger.info("Finding members from %r", state_ids) + member_state_ids = { + e: state_ids[e] + for e in state_ids if state_ids[e][0] == EventTypes.Member + } + logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id @@ -541,11 +537,14 @@ class SyncHandler(object): ) if filter_members: + logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start - if state_at_timeline_start[t][0] == EventTypes.Member + e: state_at_timeline_start[e] + for e in state_at_timeline_start + if state_at_timeline_start[e][0] == EventTypes.Member } + logger.info("Found members %r", member_state_ids) + timeline_state = { (event.type, event.state_key): event.event_id From f7dcc404f216383bfd62e4611c6a28c3f13576dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:37:53 +0000 Subject: [PATCH 19/48] add state_ids for timeline entries --- synapse/handlers/sync.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index ffb4f7915e..9b7e598e74 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -474,6 +474,7 @@ class SyncHandler(object): if filter_members: # We only request state for the members needed to display the # timeline: + types = [ (EventTypes.Member, state_key) for state_key in set( @@ -481,11 +482,14 @@ class SyncHandler(object): for event in batch.events ) ] - types.append((None, None)) # don't just filter to room members - # TODO: we should opportunistically deduplicate these members too + # TODO: we should opportunistically deduplicate these members here # within the same sync series (based on an in-memory cache) + if not types: + filter_members = False + types.append((None, None)) # don't just filter to room members + if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( @@ -545,7 +549,6 @@ class SyncHandler(object): } logger.info("Found members %r", member_state_ids) - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -559,7 +562,14 @@ class SyncHandler(object): current=current_state_ids, ) else: - state_ids = {} + if filter_members: + # strip off the (None, None) and filter to just room members + types = types[:-1] + state_ids = yield self.store.get_state_ids_for_event( + batch.events[0].event_id, types=types + ) + else: + state_ids = {} state = {} if state_ids: From 4f0493c850d4611e8ada42c1de54a18e8dc15a37 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:43:37 +0000 Subject: [PATCH 20/48] fix tsm search again --- synapse/handlers/sync.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 9b7e598e74..4bf85a128f 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -510,8 +510,8 @@ class SyncHandler(object): if filter_members: logger.info("Finding members from %r", state_ids) member_state_ids = { - e: state_ids[e] - for e in state_ids if state_ids[e][0] == EventTypes.Member + t: state_ids[t] + for t in state_ids if t[0] == EventTypes.Member } logger.info("Found members %r", member_state_ids) @@ -543,9 +543,8 @@ class SyncHandler(object): if filter_members: logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { - e: state_at_timeline_start[e] - for e in state_at_timeline_start - if state_at_timeline_start[e][0] == EventTypes.Member + t: state_at_timeline_start[t] + for t in state_at_timeline_start if t[0] == EventTypes.Member } logger.info("Found members %r", member_state_ids) From fc5397fdf5acefd33bd3b808b6d8cc7c31b69b55 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:44:55 +0000 Subject: [PATCH 21/48] remove debug --- synapse/handlers/sync.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 4bf85a128f..b7f42bd594 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -508,12 +508,10 @@ class SyncHandler(object): state_ids = current_state_ids if filter_members: - logger.info("Finding members from %r", state_ids) member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member } - logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id @@ -541,12 +539,10 @@ class SyncHandler(object): ) if filter_members: - logger.info("Finding members from %r", state_at_timeline_start) member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member } - logger.info("Found members %r", member_state_ids) timeline_state = { (event.type, event.state_key): event.event_id From 0b56290f0b14025939515d01aa72f512f0c629dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Fri, 16 Mar 2018 01:45:49 +0000 Subject: [PATCH 22/48] remove stale import --- synapse/http/server.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/http/server.py b/synapse/http/server.py index 3c7a0ef97a..e64aa92729 100644 --- a/synapse/http/server.py +++ b/synapse/http/server.py @@ -37,7 +37,6 @@ from twisted.web.util import redirectTo import collections import logging import urllib -import ujson import simplejson logger = logging.getLogger(__name__) From 366f730bf697fe8fbb18a509ec1852987bc80410 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Sun, 18 Mar 2018 21:40:35 +0000 Subject: [PATCH 23/48] only get member state IDs for incremental syncs if we're filtering --- synapse/handlers/sync.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b7f42bd594..6b57afd97b 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -557,14 +557,14 @@ class SyncHandler(object): current=current_state_ids, ) else: + state_ids = {} if filter_members: # strip off the (None, None) and filter to just room members types = types[:-1] - state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types - ) - else: - state_ids = {} + if types: + state_ids = yield self.store.get_state_ids_for_event( + batch.events[0].event_id, types=types + ) state = {} if state_ids: From 478af0f72005708dbbed23e30c547c3d66c07c0e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 01:00:12 +0000 Subject: [PATCH 24/48] reshuffle todo & comments --- synapse/handlers/sync.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 6b57afd97b..76f5057377 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -483,11 +483,15 @@ class SyncHandler(object): ) ] - # TODO: we should opportunistically deduplicate these members here - # within the same sync series (based on an in-memory cache) + # We can't remove redundant member types at this stage as it has + # to be done based on event_id, and we don't have the member + # event ids until we've pulled them out of the DB. if not types: + # an optimisation to stop needlessly trying to calculate + # member_state_ids filter_members = False + types.append((None, None)) # don't just filter to room members if full_state: @@ -559,6 +563,10 @@ class SyncHandler(object): else: state_ids = {} if filter_members: + # TODO: filter out redundant members based on their mxids (not their + # event_ids) at this point. We know we can do it based on mxid as this + # is an non-gappy incremental sync. + # strip off the (None, None) and filter to just room members types = types[:-1] if types: From b2f22829475ccfe19e994aedddb8d04995018bf4 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 19 Mar 2018 01:15:13 +0000 Subject: [PATCH 25/48] make lazy_load_members configurable in filters --- synapse/api/filtering.py | 6 ++++++ synapse/handlers/sync.py | 18 +++++++++--------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 83206348e5..339e4a31d6 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -260,6 +260,9 @@ class FilterCollection(object): def ephemeral_limit(self): return self._room_ephemeral_filter.limit() + def lazy_load_members(self): + return self._room_state_filter.lazy_load_members() + def filter_presence(self, events): return self._presence_filter.filter(events) @@ -416,6 +419,9 @@ class Filter(object): def limit(self): return self.filter_json.get("limit", 10) + def lazy_load_members(self): + return self.filter_json.get("lazy_load_members", False) + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 76f5057377..f521d22e91 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -442,7 +442,7 @@ class SyncHandler(object): @defer.inlineCallbacks def compute_state_delta(self, room_id, batch, sync_config, since_token, now_token, - full_state, filter_members): + full_state): """ Works out the differnce in state between the start of the timeline and the previous sync. @@ -455,7 +455,7 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. - filter_members(bool): Whether to only return state for members + lazy_load_members(bool): Whether to only return state for members referenced in this timeline segment Returns: @@ -470,8 +470,9 @@ class SyncHandler(object): types = None member_state_ids = {} + lazy_load_members = sync_config.filter_collection.lazy_load_members() - if filter_members: + if lazy_load_members: # We only request state for the members needed to display the # timeline: @@ -490,7 +491,7 @@ class SyncHandler(object): if not types: # an optimisation to stop needlessly trying to calculate # member_state_ids - filter_members = False + lazy_load_members = False types.append((None, None)) # don't just filter to room members @@ -511,7 +512,7 @@ class SyncHandler(object): state_ids = current_state_ids - if filter_members: + if lazy_load_members: member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member @@ -542,7 +543,7 @@ class SyncHandler(object): batch.events[0].event_id, types=types ) - if filter_members: + if lazy_load_members: member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member @@ -562,7 +563,7 @@ class SyncHandler(object): ) else: state_ids = {} - if filter_members: + if lazy_load_members: # TODO: filter out redundant members based on their mxids (not their # event_ids) at this point. We know we can do it based on mxid as this # is an non-gappy incremental sync. @@ -1380,8 +1381,7 @@ class SyncHandler(object): return state = yield self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, - full_state=full_state, filter_members=True + room_id, batch, sync_config, since_token, now_token, full_state=full_state ) if room_builder.rtype == "joined": From a6c8f7c875348ff8d63a7032c2f73a08551c516c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 01:09:55 +0100 Subject: [PATCH 26/48] add pydoc --- synapse/handlers/sync.py | 18 +++++++--- synapse/storage/state.py | 76 ++++++++++++++++++++++++++++------------ 2 files changed, 67 insertions(+), 27 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 05bf6d46dd..8e38078332 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -423,7 +423,11 @@ class SyncHandler(object): Args: event(synapse.events.EventBase): event of interest - + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) """ @@ -440,6 +444,11 @@ class SyncHandler(object): Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A Deferred map from ((type, state_key)->Event) @@ -472,8 +481,6 @@ class SyncHandler(object): be None. now_token(str): Token of the end of the current batch. full_state(bool): Whether to force returning the full state. - lazy_load_members(bool): Whether to only return state for members - referenced in this timeline segment Returns: A deferred new event dictionary @@ -496,7 +503,7 @@ class SyncHandler(object): types = [ (EventTypes.Member, state_key) for state_key in set( - event.sender # FIXME: we also care about targets etc. + event.sender # FIXME: we also care about invite targets etc. for event in batch.events ) ] @@ -1398,7 +1405,8 @@ class SyncHandler(object): return state = yield self.compute_state_delta( - room_id, batch, sync_config, since_token, now_token, full_state=full_state + room_id, batch, sync_config, since_token, now_token, + full_state=full_state ) if room_builder.rtype == "joined": diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 55159e64d0..63b6834202 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -182,7 +182,19 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_groups_from_groups(self, groups, types): - """Returns dictionary state_group -> (dict of (type, state_key) -> event id) + """Returns the state groups for a given set of groups, filtering on + types of state events. + + Args: + groups(list[int]): list of state group IDs to query + types(list[str|None, str|None])|None: List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all types are returned. + + Returns: + dictionary state_group -> (dict of (type, state_key) -> event id) """ results = {} @@ -204,6 +216,9 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: type_set = set(types) if (None, None) in type_set: + # special case (None, None) to mean that other types should be + # returned - i.e. we were just filtering down the state keys + # for particular types. include_other_types = True type_set.remove((None, None)) types = list(type_set) # deduplicate types list @@ -360,10 +375,12 @@ class StateGroupWorkerStore(SQLBaseStore): that are in the `types` list. Args: - event_ids (list) - types (list): List of (type, state_key) tuples which are used to - filter the state fetched. `state_key` may be None, which matches - any `state_key` + event_ids (list[string]) + types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -399,9 +416,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_ids(list(str)): events whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -427,9 +446,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -444,9 +465,11 @@ class StateGroupWorkerStore(SQLBaseStore): Args: event_id(str): event whose state should be returned - types(list[(str, str)]|None): List of (type, state_key) tuples - which are used to filter the state fetched. May be None, which - matches any key + types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + which are used to filter the state fetched. If `state_key` is None, + all events are returned of the given type. Presence of type of `None` + indicates that types not in the list should not be filtered out. + May be None, which matches any key. Returns: A deferred dict from (type, state_key) -> state_event @@ -492,11 +515,11 @@ class StateGroupWorkerStore(SQLBaseStore): missing state. Args: - group: The state group to lookup - types (list): List of 2-tuples of the form (`type`, `state_key`), - where a `state_key` of `None` matches all state_keys for the - `type`. Presence of type of `None` indicates that types not - in the list should not be filtered out. + group(int): The state group to lookup + types(list[str|None, str|None]): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -560,9 +583,18 @@ class StateGroupWorkerStore(SQLBaseStore): @defer.inlineCallbacks def _get_state_for_groups(self, groups, types=None): """Given list of groups returns dict of group -> list of state events - with matching types. `types` is a list of `(type, state_key)`, where - a `state_key` of None matches all state_keys. If `types` is None then - all events are returned. + with matching types. + + Args: + groups(list[int]): list of groups whose state to query + types(list[str|None, str|None]|None): List of 2-tuples of the form + (`type`, `state_key`), where a `state_key` of `None` matches all + state_keys for the `type`. Presence of type of `None` indicates + that types not in the list should not be filtered out. If None, + all events are returned. + + Returns: + dict of group -> list of state events """ if types: types = frozenset(types) From 5e6b31f0da8ba0806de856e4a9823206ac4434f9 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:21:52 +0100 Subject: [PATCH 27/48] fix dumb typo --- tests/test_dns.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_dns.py b/tests/test_dns.py index af607d626f..3b360a0fc7 100644 --- a/tests/test_dns.py +++ b/tests/test_dns.py @@ -62,7 +62,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock = Mock() dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError()) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" entry = Mock(spec_set=["expires"]) entry.expires = 0 @@ -87,7 +87,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock = Mock(spec_set=['lookupService']) dns_client_mock.lookupService = Mock(spec_set=[]) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" entry = Mock(spec_set=["expires"]) entry.expires = 999999999 @@ -111,7 +111,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock.lookupService.return_value = defer.fail(error.DNSServerError()) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" cache = {} @@ -126,7 +126,7 @@ class DnsTestCase(unittest.TestCase): dns_client_mock.lookupService.return_value = defer.fail(error.DNSNameError()) - service_name = "test_service.examle.com" + service_name = "test_service.example.com" cache = {} From b69ff33d9e26e9efe89c745e0fda4801db8bede0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:22:27 +0100 Subject: [PATCH 28/48] disable CPUMetrics if no /proc/self/stat fixes build on macOS again --- synapse/metrics/__init__.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 371c300cac..6b5b0c9471 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -34,6 +34,7 @@ all_metrics = [] all_collectors = [] all_gauges = {} +HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") class RegistryProxy(object): @@ -99,6 +100,8 @@ class CPUMetrics(object): self.ticks_per_sec = ticks_per_sec def collect(self): + if not HAVE_PROC_SELF_STAT: + return with open("/proc/self/stat") as s: line = s.read() From 8df7bad839f04cbcabf066fe549df14879639ef5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 02:32:15 +0100 Subject: [PATCH 29/48] pep8 --- synapse/metrics/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index 6b5b0c9471..bfdbbc9a23 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -36,6 +36,7 @@ all_gauges = {} HAVE_PROC_SELF_STAT = os.path.exists("/proc/self/stat") + class RegistryProxy(object): def collect(self): From 9bbb9f5556496529478753d2123526ca6894535c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 29 May 2018 04:26:10 +0100 Subject: [PATCH 30/48] add lazy_load_members to the filter json schema --- synapse/api/filtering.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 7158dd75e9..fd58961862 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -112,7 +112,10 @@ ROOM_EVENT_FILTER_SCHEMA = { }, "contains_url": { "type": "boolean" - } + }, + "lazy_load_members": { + "type": "boolean" + }, } } From 5f6122fe102f994e023d530cb6076730f31f619f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 4 Jun 2018 00:08:52 +0300 Subject: [PATCH 31/48] more comments --- synapse/handlers/sync.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 8e38078332..7ab97b24a6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -515,6 +515,9 @@ class SyncHandler(object): if not types: # an optimisation to stop needlessly trying to calculate # member_state_ids + # + # XXX: i can't remember what this trying to do. why would + # types ever be []? --matthew lazy_load_members = False types.append((None, None)) # don't just filter to room members @@ -568,6 +571,10 @@ class SyncHandler(object): ) if lazy_load_members: + # TODO: filter out redundant members based on their event_ids + # (not mxids) at this point. In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member From 924eb34d9428a4163a03249abbb6f40d4baa29c6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 18:32:02 +0100 Subject: [PATCH 32/48] add a filtered_types param to limit filtering to specific types --- synapse/handlers/sync.py | 65 ++++++++++++---------- synapse/storage/state.py | 113 +++++++++++++++++++++------------------ 2 files changed, 96 insertions(+), 82 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 0c21ac2c77..cb711b8758 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -417,38 +417,44 @@ class SyncHandler(object): )) @defer.inlineCallbacks - def get_state_after_event(self, event, types=None): + def get_state_after_event(self, event, types=None, filtered_types=None): """ Get the room state after the given event Args: event(synapse.events.EventBase): event of interest - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. + Returns: A Deferred map from ((type, state_key)->Event) """ - state_ids = yield self.store.get_state_ids_for_event(event.event_id, types) + state_ids = yield self.store.get_state_ids_for_event( + event.event_id, types, filtered_types=filtered_types + ) if event.is_state(): state_ids = state_ids.copy() state_ids[(event.type, event.state_key)] = event.event_id defer.returnValue(state_ids) @defer.inlineCallbacks - def get_state_at(self, room_id, stream_position, types=None): + def get_state_at(self, room_id, stream_position, types=None, filtered_types=None): """ Get the room state at a particular stream position Args: room_id(str): room for which to get state stream_position(StreamToken): point at which to get state - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. - May be None, which matches any key. + all events are returned of the given type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A Deferred map from ((type, state_key)->Event) @@ -463,7 +469,9 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] - state = yield self.get_state_after_event(last_event, types) + state = yield self.get_state_after_event( + last_event, types, filtered_types=filtered_types + ) else: # no events in this room - so presumably no state @@ -499,6 +507,7 @@ class SyncHandler(object): types = None member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() + filtered_types = None if lazy_load_members: # We only request state for the members needed to display the @@ -516,29 +525,25 @@ class SyncHandler(object): # to be done based on event_id, and we don't have the member # event ids until we've pulled them out of the DB. - if not types: - # an optimisation to stop needlessly trying to calculate - # member_state_ids - # - # XXX: i can't remember what this trying to do. why would - # types ever be []? --matthew - lazy_load_members = False - - types.append((None, None)) # don't just filter to room members + # only apply the filtering to room members + filtered_types = [EventTypes.Member] if full_state: if batch: current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) else: current_state_ids = yield self.get_state_at( - room_id, stream_position=now_token, types=types + room_id, stream_position=now_token, types=types, + filtered_types=filtered_types ) state_ids = current_state_ids @@ -563,15 +568,18 @@ class SyncHandler(object): ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( - room_id, stream_position=since_token, types=types + room_id, stream_position=since_token, types=types, + filtered_types=filtered_types ) current_state_ids = yield self.store.get_state_ids_for_event( - batch.events[-1].event_id, types=types + batch.events[-1].event_id, types=types, + filtered_types=filtered_types ) state_at_timeline_start = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) if lazy_load_members: @@ -603,11 +611,10 @@ class SyncHandler(object): # event_ids) at this point. We know we can do it based on mxid as this # is an non-gappy incremental sync. - # strip off the (None, None) and filter to just room members - types = types[:-1] if types: state_ids = yield self.store.get_state_ids_for_event( - batch.events[0].event_id, types=types + batch.events[0].event_id, types=types, + filtered_types=filtered_types ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index c5ff44fef7..ee531a2ce0 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types): + def _get_state_groups_from_groups(self, groups, types, filtered_types=None): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -193,9 +193,10 @@ class StateGroupWorkerStore(SQLBaseStore): groups(list[int]): list of state group IDs to query types(list[str|None, str|None])|None: List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. If None, - all types are returned. + state_keys for the `type`. If None, all types are returned. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -206,26 +207,21 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, + self._get_state_groups_from_groups_txn, chunk, types, filtered_types ) results.update(res) defer.returnValue(results) - def _get_state_groups_from_groups_txn(self, txn, groups, types=None): + def _get_state_groups_from_groups_txn( + self, txn, groups, types=None, filtered_types=None + ): results = {group: {} for group in groups} - include_other_types = False + include_other_types = False if filtered_types is None else True if types is not None: - type_set = set(types) - if (None, None) in type_set: - # special case (None, None) to mean that other types should be - # returned - i.e. we were just filtering down the state keys - # for particular types. - include_other_types = True - type_set.remove((None, None)) - types = list(type_set) # deduplicate types list + types = list(set(types)) # deduplicate types list if isinstance(self.database_engine, PostgresEngine): # Temporarily disable sequential scans in this transaction. This is @@ -276,7 +272,7 @@ class StateGroupWorkerStore(SQLBaseStore): if include_other_types: # XXX: check whether this slows postgres down like a list of # ORs does too? - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) clause_to_args.append( ( "AND type <> ? " * len(unique_types), @@ -313,7 +309,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_args.extend([typ[0], typ[1]]) if include_other_types: - unique_types = set([t for (t, _) in types]) + unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" ) @@ -373,18 +369,20 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types): + def get_state_for_events(self, event_ids, types, filtered_types): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. Args: event_ids (list[string]) - types (list[(str|None, str|None)]|None): List of (type, state_key) tuples + types (list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: deferred: A list of dicts corresponding to the event_ids given. @@ -395,7 +393,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) state_event_map = yield self.get_events( [ev_id for sd in itervalues(group_to_state) for ev_id in itervalues(sd)], @@ -414,17 +412,19 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_ids_for_events(self, event_ids, types=None): + def get_state_ids_for_events(self, event_ids, types=None, filtered_types=None): """ Get the state dicts corresponding to a list of events Args: event_ids(list(str)): events whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from event_id -> (type, state_key) -> state_event @@ -434,7 +434,7 @@ class StateGroupWorkerStore(SQLBaseStore): ) groups = set(itervalues(event_to_groups)) - group_to_state = yield self._get_state_for_groups(groups, types) + group_to_state = yield self._get_state_for_groups(groups, types, filtered_types) event_to_state = { event_id: group_to_state[group] @@ -444,41 +444,45 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({event: event_to_state[event] for event in event_ids}) @defer.inlineCallbacks - def get_state_for_event(self, event_id, types=None): + def get_state_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_for_events([event_id], types) + state_map = yield self.get_state_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @defer.inlineCallbacks - def get_state_ids_for_event(self, event_id, types=None): + def get_state_ids_for_event(self, event_id, types=None, filtered_types=None): """ Get the state dict corresponding to a particular event Args: event_id(str): event whose state should be returned - types(list[(str|None, str|None)]|None): List of (type, state_key) tuples + types(list[(str, str|None)]|None): List of (type, state_key) tuples which are used to filter the state fetched. If `state_key` is None, - all events are returned of the given type. Presence of type of `None` - indicates that types not in the list should not be filtered out. + all events are returned of the given type. May be None, which matches any key. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: A deferred dict from (type, state_key) -> state_event """ - state_map = yield self.get_state_ids_for_events([event_id], types) + state_map = yield self.get_state_ids_for_events([event_id], types, filtered_types) defer.returnValue(state_map[event_id]) @cached(max_entries=50000) @@ -509,7 +513,7 @@ class StateGroupWorkerStore(SQLBaseStore): defer.returnValue({row["event_id"]: row["state_group"] for row in rows}) - def _get_some_state_from_cache(self, group, types): + def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). @@ -520,29 +524,30 @@ class StateGroupWorkerStore(SQLBaseStore): Args: group(int): The state group to lookup - types(list[str|None, str|None]): List of 2-tuples of the form + types(list[str, str|None]): List of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all - state_keys for the `type`. Presence of type of `None` indicates - that types not in the list should not be filtered out. + state_keys for the `type`. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) type_to_key = {} + + # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False + include_other_types = True if filtered_types is None else False for typ, state_key in types: key = (typ, state_key) - if typ is None: - include_other_types = True - next - if state_key is None: type_to_key[typ] = None # XXX: why do we mark the type as missing from our cache just # because we weren't filtering on a specific value of state_key? + # is it because the cache doesn't handle wildcards? missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: @@ -556,7 +561,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types + return include_other_types and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -585,21 +590,23 @@ class StateGroupWorkerStore(SQLBaseStore): return state_dict_ids, is_all @defer.inlineCallbacks - def _get_state_for_groups(self, groups, types=None): + def _get_state_for_groups(self, groups, types=None, filtered_types=None): """Gets the state at each of a list of state groups, optionally filtering by type/state_key Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None|str, None|str)]): + types (None|iterable[(None, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. Otherwise, each entry should be a `(type, state_key)` tuple to include in the response. A `state_key` of None is a wildcard - meaning that we require all state with that type. A `type` of None - indicates that types not in the list should not be filtered out. + meaning that we require all state with that type. + filtered_types(list[str]|None): Only apply filtering via `types` to this + list of event types. Other types of events are returned unfiltered. + If None, `types` filtering is applied to all events. Returns: Deferred[dict[int, dict[(type, state_key), EventBase]]] @@ -612,7 +619,7 @@ class StateGroupWorkerStore(SQLBaseStore): if types is not None: for group in set(groups): state_dict_ids, _, got_all = self._get_some_state_from_cache( - group, types, + group, types, filtered_types ) results[group] = state_dict_ids @@ -645,7 +652,7 @@ class StateGroupWorkerStore(SQLBaseStore): types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, + missing_groups, types_to_fetch, filtered_types ) for group, group_state_dict in iteritems(group_to_state_dict): From bcaec2915ac74937171e27d507b8f9c0e39d3677 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:03:50 +0100 Subject: [PATCH 33/48] incorporate review --- synapse/handlers/sync.py | 44 ++++++++++++++++++++++++---------------- synapse/storage/state.py | 7 ++++--- 2 files changed, 31 insertions(+), 20 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index cb711b8758..b597f94cf6 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -435,7 +435,7 @@ class SyncHandler(object): A Deferred map from ((type, state_key)->Event) """ state_ids = yield self.store.get_state_ids_for_event( - event.event_id, types, filtered_types=filtered_types + event.event_id, types, filtered_types=filtered_types, ) if event.is_state(): state_ids = state_ids.copy() @@ -470,7 +470,7 @@ class SyncHandler(object): if last_events: last_event = last_events[-1] state = yield self.get_state_after_event( - last_event, types, filtered_types=filtered_types + last_event, types, filtered_types=filtered_types, ) else: @@ -505,7 +505,6 @@ class SyncHandler(object): with Measure(self.clock, "compute_state_delta"): types = None - member_state_ids = {} lazy_load_members = sync_config.filter_collection.lazy_load_members() filtered_types = None @@ -521,10 +520,6 @@ class SyncHandler(object): ) ] - # We can't remove redundant member types at this stage as it has - # to be done based on event_id, and we don't have the member - # event ids until we've pulled them out of the DB. - # only apply the filtering to room members filtered_types = [EventTypes.Member] @@ -532,27 +527,32 @@ class SyncHandler(object): if batch: current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) else: current_state_ids = yield self.get_state_at( room_id, stream_position=now_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_ids = current_state_ids + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: member_state_ids = { t: state_ids[t] for t in state_ids if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -569,28 +569,38 @@ class SyncHandler(object): elif batch.limited: state_at_previous_sync = yield self.get_state_at( room_id, stream_position=since_token, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) current_state_ids = yield self.store.get_state_ids_for_event( batch.events[-1].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state_at_timeline_start = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) + # track the membership state events as of the beginning of this + # timeline sequence, so they can be filtered out of the state + # if we are lazy loading members. if lazy_load_members: - # TODO: filter out redundant members based on their event_ids - # (not mxids) at this point. In practice, limited syncs are + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implement, this should filter using event_ids (not mxids). + # In practice, limited syncs are # relatively rare so it's not a total disaster to send redundant - # members down at this point. + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. member_state_ids = { t: state_at_timeline_start[t] for t in state_at_timeline_start if t[0] == EventTypes.Member } + else: + member_state_ids = {} timeline_state = { (event.type, event.state_key): event.event_id @@ -614,7 +624,7 @@ class SyncHandler(object): if types: state_ids = yield self.store.get_state_ids_for_event( batch.events[0].event_id, types=types, - filtered_types=filtered_types + filtered_types=filtered_types, ) state = {} diff --git a/synapse/storage/state.py b/synapse/storage/state.py index ee531a2ce0..75c6366e7a 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -545,9 +545,10 @@ class StateGroupWorkerStore(SQLBaseStore): if state_key is None: type_to_key[typ] = None - # XXX: why do we mark the type as missing from our cache just - # because we weren't filtering on a specific value of state_key? - # is it because the cache doesn't handle wildcards? + # we mark the type as missing from the cache because + # when the cache was populated it might have been done with a + # restricted set of state_keys, so the wildcard will not work + # and the cache may be incomplete. missing_types.add(key) else: if type_to_key.get(typ, object()) is not None: From 2f558300cc648e633342746dc7b42a36fcb6b32e Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 19:22:27 +0100 Subject: [PATCH 34/48] fix thinkos; unbreak tests --- synapse/storage/state.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 75c6366e7a..f09be7172d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -369,7 +369,7 @@ class StateGroupWorkerStore(SQLBaseStore): return results @defer.inlineCallbacks - def get_state_for_events(self, event_ids, types, filtered_types): + def get_state_for_events(self, event_ids, types, filtered_types=None): """Given a list of event_ids and type tuples, return a list of state dicts for each event. The state dicts will only have the type/state_keys that are in the `types` list. @@ -538,7 +538,7 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = True if filtered_types is None else False + include_other_types = False if filtered_types is None else True for typ, state_key in types: key = (typ, state_key) From 1fa4f7e03e5cdaebeda0f0b4c49120c991f5bf57 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 20:19:32 +0100 Subject: [PATCH 35/48] first cut of a UT for testing state store (untested) --- tests/storage/test_state.py | 151 ++++++++++++++++++++++++++++++++++++ 1 file changed, 151 insertions(+) create mode 100644 tests/storage/test_state.py diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py new file mode 100644 index 0000000000..acf36e077f --- /dev/null +++ b/tests/storage/test_state.py @@ -0,0 +1,151 @@ +# -*- coding: utf-8 -*- +# Copyright 2018 New Vector Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.api.constants import EventTypes, Membership +from synapse.types import RoomID, UserID + +import tests.unittest +import tests.utils + + +class StateStoreTestCase(tests.unittest.TestCase): + def __init__(self, *args, **kwargs): + super(StateStoreTestCase, self).__init__(*args, **kwargs) + self.store = None # type: synapse.storage.DataStore + + @defer.inlineCallbacks + def setUp(self): + hs = yield tests.utils.setup_test_homeserver() + + self.store = hs.get_datastore() + self.event_builder_factory = hs.get_event_builder_factory() + self.event_creation_handler = hs.get_event_creation_handler() + + self.u_alice = UserID.from_string("@alice:test") + self.u_bob = UserID.from_string("@bob:test") + + # User elsewhere on another host + self.u_charlie = UserID.from_string("@charlie:elsewhere") + + self.room = RoomID.from_string("!abc123:test") + + yield self.store.store_room( + self.room.to_string(), + room_creator_user_id="@creator:text", + is_public=True + ) + + @defer.inlineCallbacks + def inject_state_event(self, room, sender, typ, state_key, content): + builder = self.event_builder_factory.new({ + "type": typ, + "sender": sender.to_string(), + "state_key": state_key, + "room_id": room.to_string(), + "content": content, + }) + + event, context = yield self.event_creation_handler.create_new_client_event( + builder + ) + + yield self.store.persist_event(event, context) + + defer.returnValue(event) + + @defer.inlineCallbacks + def test_get_state_for_events(self): + + # this defaults to a linear DAG as each new injection defaults to whatever + # forward extremities are currently in the DB for this room. + (e1, c1) = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Create, '', {}, + ) + (e2, c2) = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Name, '', { + "name": "test room" + }, + ) + (e3, c3) = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Member, self.u_alice, { + "membership": Membership.JOIN + }, + ) + (e4, c4) = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob, { + "membership": Membership.JOIN + }, + ) + (e5, c5) = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob, { + "membership": Membership.LEAVE + }, + ) + + # check we get the full state as of the final event + state = yield self.store.get_state_for_events( + e5.event_id, None, filtered_types=None + ) + + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }, state) + + # check we can filter to the m.room.name event (with a '' state key) + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Name, '')), filtered_types=None + ) + + self.assertDictEqual({ + (e2.type, e2.state_key): e2.event_id, + }, state) + + # check we can filter to the m.room.name event (with a wildcard None state key) + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Name, None)), filtered_types=None + ) + + self.assertDictEqual({ + (e2.type, e2.state_key): e2.event_id, + }, state) + + # check we can grab the m.room.member events (with a wildcard None state key) + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Member, None)), filtered_types=None + ) + + self.assertDictEqual({ + (e3.type, e3.state_key): e3.event_id, + (e5.type, e5.state_key): e5.event_id, + }, state) + + # check we can use filter_types to grab a specific room member + # without filtering out the other event types + state = yield self.store.get_state_for_events( + e5.event_id, ((EventTypes.Member, self.u_alice)), + filtered_types=[EventTypes.Member], + ) + + self.assertDictEqual({ + (e1.type, e1.state_key): e3.event_id, + (e2.type, e2.state_key): e3.event_id, + (e3.type, e3.state_key): e5.event_id, + }, state) From 650daf56285c515d9c0875a9a3894a033337b0c9 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Thu, 19 Jul 2018 20:49:44 +0100 Subject: [PATCH 36/48] make test work --- tests/storage/test_state.py | 83 ++++++++++++++++++++----------------- 1 file changed, 46 insertions(+), 37 deletions(-) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index acf36e077f..8924ba9f7f 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. +import logging + from twisted.internet import defer from synapse.api.constants import EventTypes, Membership @@ -21,6 +23,8 @@ from synapse.types import RoomID, UserID import tests.unittest import tests.utils +logger = logging.getLogger(__name__) + class StateStoreTestCase(tests.unittest.TestCase): def __init__(self, *args, **kwargs): @@ -38,9 +42,6 @@ class StateStoreTestCase(tests.unittest.TestCase): self.u_alice = UserID.from_string("@alice:test") self.u_bob = UserID.from_string("@bob:test") - # User elsewhere on another host - self.u_charlie = UserID.from_string("@charlie:elsewhere") - self.room = RoomID.from_string("!abc123:test") yield self.store.store_room( @@ -67,85 +68,93 @@ class StateStoreTestCase(tests.unittest.TestCase): defer.returnValue(event) + def assertStateMapEqual(self, s1, s2): + for t in s1: + # just compare event IDs for simplicity + self.assertEqual(s1[t].event_id, s2[t].event_id) + self.assertEqual(len(s1), len(s2)) + @defer.inlineCallbacks - def test_get_state_for_events(self): + def test_get_state_for_event(self): # this defaults to a linear DAG as each new injection defaults to whatever # forward extremities are currently in the DB for this room. - (e1, c1) = yield self.inject_state_event( + e1 = yield self.inject_state_event( self.room, self.u_alice, EventTypes.Create, '', {}, ) - (e2, c2) = yield self.inject_state_event( + e2 = yield self.inject_state_event( self.room, self.u_alice, EventTypes.Name, '', { "name": "test room" }, ) - (e3, c3) = yield self.inject_state_event( - self.room, self.u_alice, EventTypes.Member, self.u_alice, { + e3 = yield self.inject_state_event( + self.room, self.u_alice, EventTypes.Member, self.u_alice.to_string(), { "membership": Membership.JOIN }, ) - (e4, c4) = yield self.inject_state_event( - self.room, self.u_bob, EventTypes.Member, self.u_bob, { + e4 = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob.to_string(), { "membership": Membership.JOIN }, ) - (e5, c5) = yield self.inject_state_event( - self.room, self.u_bob, EventTypes.Member, self.u_bob, { + e5 = yield self.inject_state_event( + self.room, self.u_bob, EventTypes.Member, self.u_bob.to_string(), { "membership": Membership.LEAVE }, ) # check we get the full state as of the final event - state = yield self.store.get_state_for_events( + state = yield self.store.get_state_for_event( e5.event_id, None, filtered_types=None ) - self.assertDictEqual({ - (e1.type, e1.state_key): e1.event_id, - (e2.type, e2.state_key): e2.event_id, - (e3.type, e3.state_key): e3.event_id, + self.assertIsNotNone(e4) + + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + (e3.type, e3.state_key): e3, # e4 is overwritten by e5 - (e5.type, e5.state_key): e5.event_id, + (e5.type, e5.state_key): e5, }, state) # check we can filter to the m.room.name event (with a '' state key) - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Name, '')), filtered_types=None + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Name, '')], filtered_types=None ) - self.assertDictEqual({ - (e2.type, e2.state_key): e2.event_id, + self.assertStateMapEqual({ + (e2.type, e2.state_key): e2, }, state) # check we can filter to the m.room.name event (with a wildcard None state key) - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Name, None)), filtered_types=None + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Name, None)], filtered_types=None ) - self.assertDictEqual({ - (e2.type, e2.state_key): e2.event_id, + self.assertStateMapEqual({ + (e2.type, e2.state_key): e2, }, state) # check we can grab the m.room.member events (with a wildcard None state key) - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Member, None)), filtered_types=None + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Member, None)], filtered_types=None ) - self.assertDictEqual({ - (e3.type, e3.state_key): e3.event_id, - (e5.type, e5.state_key): e5.event_id, + self.assertStateMapEqual({ + (e3.type, e3.state_key): e3, + (e5.type, e5.state_key): e5, }, state) # check we can use filter_types to grab a specific room member # without filtering out the other event types - state = yield self.store.get_state_for_events( - e5.event_id, ((EventTypes.Member, self.u_alice)), + state = yield self.store.get_state_for_event( + e5.event_id, [(EventTypes.Member, self.u_alice.to_string())], filtered_types=[EventTypes.Member], ) - self.assertDictEqual({ - (e1.type, e1.state_key): e3.event_id, - (e2.type, e2.state_key): e3.event_id, - (e3.type, e3.state_key): e5.event_id, + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + (e3.type, e3.state_key): e3, }, state) From 254fb430d1662c93c56c2abbd6984e07fb04c36b Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 19:21:20 +0100 Subject: [PATCH 37/48] incorporate review --- synapse/handlers/sync.py | 67 ++++++++++++++++------------------------ synapse/storage/state.py | 20 +++++------- 2 files changed, 35 insertions(+), 52 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index b597f94cf6..5689ad2f58 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -543,17 +543,6 @@ class SyncHandler(object): state_ids = current_state_ids - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - member_state_ids = { - t: state_ids[t] - for t in state_ids if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() @@ -562,9 +551,9 @@ class SyncHandler(object): state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_ids, - timeline_start_members=member_state_ids, previous={}, current=current_state_ids, + lazy_load_members=lazy_load_members, ) elif batch.limited: state_at_previous_sync = yield self.get_state_at( @@ -582,37 +571,27 @@ class SyncHandler(object): filtered_types=filtered_types, ) - # track the membership state events as of the beginning of this - # timeline sequence, so they can be filtered out of the state - # if we are lazy loading members. - if lazy_load_members: - # TODO: optionally filter out redundant membership events at this - # point, to stop repeatedly sending members in every /sync as if - # the client isn't tracking them. - # When implement, this should filter using event_ids (not mxids). - # In practice, limited syncs are - # relatively rare so it's not a total disaster to send redundant - # members down at this point. Redundant members are ones which - # repeatedly get sent down /sync because we don't know if the client - # is caching them or not. - member_state_ids = { - t: state_at_timeline_start[t] - for t in state_at_timeline_start if t[0] == EventTypes.Member - } - else: - member_state_ids = {} - timeline_state = { (event.type, event.state_key): event.event_id for event in batch.events if event.is_state() } + # TODO: optionally filter out redundant membership events at this + # point, to stop repeatedly sending members in every /sync as if + # the client isn't tracking them. + # When implemented, this should filter using event_ids (not mxids). + # In practice, limited syncs are + # relatively rare so it's not a total disaster to send redundant + # members down at this point. Redundant members are ones which + # repeatedly get sent down /sync because we don't know if the client + # is caching them or not. + state_ids = _calculate_state( timeline_contains=timeline_state, timeline_start=state_at_timeline_start, - timeline_start_members=member_state_ids, previous=state_at_previous_sync, current=current_state_ids, + lazy_load_members=lazy_load_members, ) else: state_ids = {} @@ -1536,16 +1515,14 @@ def _action_has_highlight(actions): return False -def _calculate_state(timeline_contains, timeline_start, timeline_start_members, - previous, current): +def _calculate_state( + timeline_contains, timeline_start, previous, current, lazy_load_members, +): """Works out what state to include in a sync response. Args: timeline_contains (dict): state in the timeline timeline_start (dict): state at the start of the timeline - timeline_start_members (dict): state at the start of the timeline - for room members who participate in this chunk of timeline. - Should always be a subset of timeline_start. previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline @@ -1565,11 +1542,21 @@ def _calculate_state(timeline_contains, timeline_start, timeline_start_members, c_ids = set(e for e in current.values()) ts_ids = set(e for e in timeline_start.values()) - tsm_ids = set(e for e in timeline_start_members.values()) p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | tsm_ids + # track the membership events in the state as of the start of the timeline + # so we can add them back in to the state if we're lazyloading. We don't + # add them into state if they're already contained in the timeline. + if lazy_load_members: + ll_ids = set( + e for t, e in timeline_start.iteritems() + if t[0] == EventTypes.Member and e not in tc_ids + ) + else: + ll_ids = set() + + state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids return { event_id_to_key[e]: e for e in state_ids diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f09be7172d..40ca8bd2a2 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -191,10 +191,10 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups(list[int]): list of state group IDs to query - types(list[str|None, str|None])|None: List of 2-tuples of the form + types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(list[str]|None): Only apply filtering via `types` to this + filtered_types(Iterable[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. @@ -207,19 +207,17 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types + self._get_state_groups_from_groups_txn, chunk, types, filtered_types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None + self, txn, groups, types=None, filtered_types=None, ): results = {group: {} for group in groups} - include_other_types = False if filtered_types is None else True - if types is not None: types = list(set(types)) # deduplicate types list @@ -269,7 +267,7 @@ class StateGroupWorkerStore(SQLBaseStore): for etype, state_key in types ] - if include_other_types: + if filtered_types is not None: # XXX: check whether this slows postgres down like a list of # ORs does too? unique_types = set(filtered_types) @@ -308,7 +306,7 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if include_other_types: + if filtered_types is not None: unique_types = set(filtered_types) where_clauses.append( "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" @@ -538,8 +536,6 @@ class StateGroupWorkerStore(SQLBaseStore): # tracks which of the requested types are missing from our cache missing_types = set() - include_other_types = False if filtered_types is None else True - for typ, state_key in types: key = (typ, state_key) @@ -562,7 +558,7 @@ class StateGroupWorkerStore(SQLBaseStore): def include(typ, state_key): valid_state_keys = type_to_key.get(typ, sentinel) if valid_state_keys is sentinel: - return include_other_types and typ not in filtered_types + return filtered_types is not None and typ not in filtered_types if valid_state_keys is None: return True if state_key in valid_state_keys: @@ -598,7 +594,7 @@ class StateGroupWorkerStore(SQLBaseStore): Args: groups (iterable[int]): list of state groups for which we want to get the state. - types (None|iterable[(None, None|str)]): + types (None|iterable[(str, None|str)]): indicates the state type/keys required. If None, the whole state is fetched and returned. From 004a83b43a5c75ec6ca3378c8ef5a4c616e33e8c Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 22:32:35 +0100 Subject: [PATCH 38/48] changelog --- changelog.d/2970.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/2970.feature diff --git a/changelog.d/2970.feature b/changelog.d/2970.feature new file mode 100644 index 0000000000..5eb928563f --- /dev/null +++ b/changelog.d/2970.feature @@ -0,0 +1 @@ +add support for the lazy_loaded_members filter as per MSC1227 From efcdacad7d1b7f52f879179701c7e0d9b763511f Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 23 Jul 2018 22:41:05 +0100 Subject: [PATCH 39/48] handle case where types is [] on postgres correctly --- synapse/storage/state.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 40ca8bd2a2..f99d3871e4 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -255,7 +255,7 @@ class StateGroupWorkerStore(SQLBaseStore): # Turns out that postgres doesn't like doing a list of OR's and # is about 1000x slower, so we just issue a query for each specific # type seperately. - if types: + if types is not None: clause_to_args = [ ( "AND type = ? AND state_key = ?", From cd241d6bda01a761fbe1ca29727dacd918fb8975 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 12:39:40 +0100 Subject: [PATCH 40/48] incorporate more review --- synapse/handlers/sync.py | 12 +++++++++--- synapse/storage/state.py | 36 +++++++++--------------------------- tests/storage/test_state.py | 9 +++++++++ 3 files changed, 27 insertions(+), 30 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 5689ad2f58..e5a2329d73 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1526,6 +1526,9 @@ def _calculate_state( previous (dict): state at the end of the previous sync (or empty dict if this is an initial sync) current (dict): state at the end of the timeline + lazy_load_members (bool): whether to return members from timeline_start + or not. assumes that timeline_start has already been filtered to + include only the members the client needs to know about. Returns: dict @@ -1545,9 +1548,12 @@ def _calculate_state( p_ids = set(e for e in previous.values()) tc_ids = set(e for e in timeline_contains.values()) - # track the membership events in the state as of the start of the timeline - # so we can add them back in to the state if we're lazyloading. We don't - # add them into state if they're already contained in the timeline. + # If we are lazyloading room members, we explicitly add the membership events + # for the senders in the timeline into the state block returned by /sync, + # as we may not have sent them to the client before. We find these membership + # events by filtering them out of timeline_start, which has already been filtered + # to only include membership events for the senders in the timeline. + if lazy_load_members: ll_ids = set( e for t, e in timeline_start.iteritems() diff --git a/synapse/storage/state.py b/synapse/storage/state.py index f99d3871e4..1413a6f910 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -185,7 +185,7 @@ class StateGroupWorkerStore(SQLBaseStore): }) @defer.inlineCallbacks - def _get_state_groups_from_groups(self, groups, types, filtered_types=None): + def _get_state_groups_from_groups(self, groups, types): """Returns the state groups for a given set of groups, filtering on types of state events. @@ -194,9 +194,6 @@ class StateGroupWorkerStore(SQLBaseStore): types (Iterable[str, str|None]|None): list of 2-tuples of the form (`type`, `state_key`), where a `state_key` of `None` matches all state_keys for the `type`. If None, all types are returned. - filtered_types(Iterable[str]|None): Only apply filtering via `types` to this - list of event types. Other types of events are returned unfiltered. - If None, `types` filtering is applied to all events. Returns: dictionary state_group -> (dict of (type, state_key) -> event id) @@ -207,14 +204,14 @@ class StateGroupWorkerStore(SQLBaseStore): for chunk in chunks: res = yield self.runInteraction( "_get_state_groups_from_groups", - self._get_state_groups_from_groups_txn, chunk, types, filtered_types, + self._get_state_groups_from_groups_txn, chunk, types, ) results.update(res) defer.returnValue(results) def _get_state_groups_from_groups_txn( - self, txn, groups, types=None, filtered_types=None, + self, txn, groups, types=None, ): results = {group: {} for group in groups} @@ -266,17 +263,6 @@ class StateGroupWorkerStore(SQLBaseStore): ) for etype, state_key in types ] - - if filtered_types is not None: - # XXX: check whether this slows postgres down like a list of - # ORs does too? - unique_types = set(filtered_types) - clause_to_args.append( - ( - "AND type <> ? " * len(unique_types), - list(unique_types) - ) - ) else: # If types is None we fetch all the state, and so just use an # empty where clause with no extra args. @@ -306,13 +292,6 @@ class StateGroupWorkerStore(SQLBaseStore): where_clauses.append("(type = ? AND state_key = ?)") where_args.extend([typ[0], typ[1]]) - if filtered_types is not None: - unique_types = set(filtered_types) - where_clauses.append( - "(" + " AND ".join(["type <> ?"] * len(unique_types)) + ")" - ) - where_args.extend(list(unique_types)) - where_clause = "AND (%s)" % (" OR ".join(where_clauses)) else: where_clause = "" @@ -643,13 +622,13 @@ class StateGroupWorkerStore(SQLBaseStore): # cache. Hence, if we are doing a wildcard lookup, populate the # cache fully so that we can do an efficient lookup next time. - if types and any(k is None for (t, k) in types): + if filtered_types or (types and any(k is None for (t, k) in types)): types_to_fetch = None else: types_to_fetch = types group_to_state_dict = yield self._get_state_groups_from_groups( - missing_groups, types_to_fetch, filtered_types + missing_groups, types_to_fetch ) for group, group_state_dict in iteritems(group_to_state_dict): @@ -659,7 +638,10 @@ class StateGroupWorkerStore(SQLBaseStore): if types: for k, v in iteritems(group_state_dict): (typ, _) = k - if k in types or (typ, None) in types: + if ( + (k in types or (typ, None) in types) or + (filtered_types and typ not in filtered_types) + ): state_dict[k] = v else: state_dict.update(group_state_dict) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index 8924ba9f7f..b2f314e9db 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -158,3 +158,12 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2, (e3.type, e3.state_key): e3, }, state) + + state = yield self.store.get_state_for_event( + e5.event_id, [], filtered_types=[EventTypes.Member], + ) + + self.assertStateMapEqual({ + (e1.type, e1.state_key): e1, + (e2.type, e2.state_key): e2, + }, state) From eb1d911ab743e85154f7c4b2db8a954d152020dc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 13:40:49 +0100 Subject: [PATCH 41/48] rather than adding ll_ids, remove them from p_ids --- synapse/handlers/sync.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index e5a2329d73..1422843af8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1553,16 +1553,17 @@ def _calculate_state( # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. + # In practice, we can do this by removing them from the p_ids list. + # see https://github.com/matrix-org/synapse/pull/2970 + # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 if lazy_load_members: - ll_ids = set( + p_ids.difference_update( e for t, e in timeline_start.iteritems() - if t[0] == EventTypes.Member and e not in tc_ids + if t[0] == EventTypes.Member ) - else: - ll_ids = set() - state_ids = (((c_ids | ts_ids) - p_ids) - tc_ids) | ll_ids + state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids return { event_id_to_key[e]: e for e in state_ids From e22700c3dd929f9f0953b3d2b37e503eece82b38 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 13:59:07 +0100 Subject: [PATCH 42/48] consider non-filter_type types as wildcards, thus missing from the state-group-cache --- synapse/storage/state.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 1413a6f910..86f2c2e6b1 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -518,7 +518,10 @@ class StateGroupWorkerStore(SQLBaseStore): for typ, state_key in types: key = (typ, state_key) - if state_key is None: + if ( + state_key is None or + filtered_types is not None and typ not in filtered_types + ): type_to_key[typ] = None # we mark the type as missing from the cache because # when the cache was populated it might have been done with a From 1a01a5b964d3ea373355684a91b9f7fd95726fbc Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 14:03:15 +0100 Subject: [PATCH 43/48] clarify comment on p_ids --- synapse/handlers/sync.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/handlers/sync.py b/synapse/handlers/sync.py index 1422843af8..4ced3144c8 100644 --- a/synapse/handlers/sync.py +++ b/synapse/handlers/sync.py @@ -1553,7 +1553,8 @@ def _calculate_state( # as we may not have sent them to the client before. We find these membership # events by filtering them out of timeline_start, which has already been filtered # to only include membership events for the senders in the timeline. - # In practice, we can do this by removing them from the p_ids list. + # In practice, we can do this by removing them from the p_ids list, + # which is the list of relevant state we know we have already sent to the client. # see https://github.com/matrix-org/synapse/pull/2970 # /files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809 From cb5c37a57c387a74f6079008870cf024e674dfe5 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Tue, 24 Jul 2018 20:34:45 +0100 Subject: [PATCH 44/48] handle the edge case for _get_some_state_from_cache where types is [] --- synapse/storage/state.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 86f2c2e6b1..989977c644 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -547,7 +547,13 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - got_all = is_all or not missing_types + if types == [] and filtered_types is not None: + # special wildcard case for empty type-list but an explicit filtered_types + # which means that we'll try to return all types which aren't in the + # filtered_types list. missing_types will always be empty, so we ignore it. + got_all = is_all + else: + got_all = is_all or not missing_types return { k: v for k, v in iteritems(state_dict_ids) From 7d9fb88617f475ac7e064c5cccc8d78dbd78d2a3 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 16:15:33 +0100 Subject: [PATCH 45/48] incorporate more review. --- synapse/storage/state.py | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 989977c644..e38427bf9d 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -493,12 +493,6 @@ class StateGroupWorkerStore(SQLBaseStore): def _get_some_state_from_cache(self, group, types, filtered_types=None): """Checks if group is in cache. See `_get_state_for_groups` - Returns 3-tuple (`state_dict`, `missing_types`, `got_all`). - `missing_types` is the list of types that aren't in the cache for that - group. `got_all` is a bool indicating if we successfully retrieved all - requests state from the cache, if False we need to query the DB for the - missing state. - Args: group(int): The state group to lookup types(list[str, str|None]): List of 2-tuples of the form @@ -507,6 +501,11 @@ class StateGroupWorkerStore(SQLBaseStore): filtered_types(list[str]|None): Only apply filtering via `types` to this list of event types. Other types of events are returned unfiltered. If None, `types` filtering is applied to all events. + + Returns 2-tuple (`state_dict`, `got_all`). + `got_all` is a bool indicating if we successfully retrieved all + requests state from the cache, if False we need to query the DB for the + missing state. """ is_all, known_absent, state_dict_ids = self._state_group_cache.get(group) @@ -520,7 +519,7 @@ class StateGroupWorkerStore(SQLBaseStore): if ( state_key is None or - filtered_types is not None and typ not in filtered_types + (filtered_types is not None and typ not in filtered_types) ): type_to_key[typ] = None # we mark the type as missing from the cache because @@ -547,18 +546,17 @@ class StateGroupWorkerStore(SQLBaseStore): return True return False - if types == [] and filtered_types is not None: - # special wildcard case for empty type-list but an explicit filtered_types - # which means that we'll try to return all types which aren't in the - # filtered_types list. missing_types will always be empty, so we ignore it. - got_all = is_all - else: - got_all = is_all or not missing_types + got_all = is_all + if not got_all: + # the cache is incomplete. We may still have got all the results we need, if + # we don't have any wildcards in the match list. + if not missing_types and filtered_types is None: + got_all = True return { k: v for k, v in iteritems(state_dict_ids) if include(k[0], k[1]) - }, missing_types, got_all + }, got_all def _get_all_state_from_cache(self, group): """Checks if group is in cache. See `_get_state_for_groups` @@ -603,7 +601,7 @@ class StateGroupWorkerStore(SQLBaseStore): missing_groups = [] if types is not None: for group in set(groups): - state_dict_ids, _, got_all = self._get_some_state_from_cache( + state_dict_ids, got_all = self._get_some_state_from_cache( group, types, filtered_types ) results[group] = state_dict_ids From 0a7ee0ab8b0794c8633177edca8f839d34c6a42a Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 16:10:34 +0100 Subject: [PATCH 46/48] add tests for _get_some_state_from_cache --- tests/storage/test_state.py | 150 ++++++++++++++++++++++++++++++++++++ 1 file changed, 150 insertions(+) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index b2f314e9db..b7797c45e8 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -159,6 +159,8 @@ class StateStoreTestCase(tests.unittest.TestCase): (e3.type, e3.state_key): e3, }, state) + # check that types=[], filtered_types=[EventTypes.Member] + # doesn't return all members state = yield self.store.get_state_for_event( e5.event_id, [], filtered_types=[EventTypes.Member], ) @@ -167,3 +169,151 @@ class StateStoreTestCase(tests.unittest.TestCase): (e1.type, e1.state_key): e1, (e2.type, e2.state_key): e2, }, state) + + ################################## + # _get_some_state_from_cache tests + ################################## + + room_id = self.room.to_string() + group_ids = yield self.store.get_state_groups_ids(room_id, [e5.event_id]) + group = group_ids.keys()[0] + + # test that _get_some_state_from_cache correctly filters out members with types=[] + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=wildcard + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + # and no filtered_types + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=None + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + ####################################################### + # deliberately remove e2 (room name) from the _state_group_cache + + (is_all, known_absent, state_dict_ids) = self.store._state_group_cache.get(group) + + self.assertEqual(is_all, True) + self.assertEqual(known_absent, set()) + self.assertDictEqual(state_dict_ids, { + (e1.type, e1.state_key): e1.event_id, + (e2.type, e2.state_key): e2.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }) + + state_dict_ids.pop((e2.type, e2.state_key)) + self.store._state_group_cache.invalidate(group) + self.store._state_group_cache.update( + sequence=self.store._state_group_cache.sequence, + key=group, + value=state_dict_ids, + # list fetched keys so it knows it's partial + fetched_keys=( + (e1.type, e1.state_key), + (e3.type, e3.state_key), + (e5.type, e5.state_key), + ) + ) + + (is_all, known_absent, state_dict_ids) = self.store._state_group_cache.get(group) + + self.assertEqual(is_all, False) + self.assertEqual(known_absent, set([ + (e1.type, e1.state_key), + (e3.type, e3.state_key), + (e5.type, e5.state_key), + ])) + self.assertDictEqual(state_dict_ids, { + (e1.type, e1.state_key): e1.event_id, + (e3.type, e3.state_key): e3.event_id, + (e5.type, e5.state_key): e5.event_id, + }) + + ################################################### + # test that things work with a partial cache + + # test that _get_some_state_from_cache correctly filters out members with types=[] + room_id = self.room.to_string() + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, False) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=wildcard + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, False) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e3.type, e3.state_key): e3.event_id, + # e4 is overwritten by e5 + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] + ) + + self.assertEqual(is_all, False) + self.assertDictEqual({ + (e1.type, e1.state_key): e1.event_id, + (e5.type, e5.state_key): e5.event_id, + }, state_dict) + + # test that _get_some_state_from_cache correctly filters out members with types=specific + # and no filtered_types + (state_dict, is_all) = yield self.store._get_some_state_from_cache( + group, [(EventTypes.Member, e5.state_key)], filtered_types=None + ) + + self.assertEqual(is_all, True) + self.assertDictEqual({ + (e5.type, e5.state_key): e5.event_id, + }, state_dict) From 0620d27f4d4a7f5c228c519b489efc8bc0c0a7d0 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 17:21:17 +0100 Subject: [PATCH 47/48] flake8 --- tests/storage/test_state.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/tests/storage/test_state.py b/tests/storage/test_state.py index b7797c45e8..7a76d67b8c 100644 --- a/tests/storage/test_state.py +++ b/tests/storage/test_state.py @@ -170,15 +170,15 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2, }, state) - ################################## - # _get_some_state_from_cache tests - ################################## + ####################################################### + # _get_some_state_from_cache tests against a full cache + ####################################################### room_id = self.room.to_string() group_ids = yield self.store.get_state_groups_ids(room_id, [e5.event_id]) group = group_ids.keys()[0] - # test that _get_some_state_from_cache correctly filters out members with types=[] + # test _get_some_state_from_cache correctly filters out members with types=[] (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [], filtered_types=[EventTypes.Member] ) @@ -189,7 +189,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e2.type, e2.state_key): e2.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=wildcard + # test _get_some_state_from_cache correctly filters in members with wildcard types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] ) @@ -203,7 +203,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] ) @@ -215,7 +215,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types # and no filtered_types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=None @@ -269,10 +269,10 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }) - ################################################### + ############################################ # test that things work with a partial cache - # test that _get_some_state_from_cache correctly filters out members with types=[] + # test _get_some_state_from_cache correctly filters out members with types=[] room_id = self.room.to_string() (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [], filtered_types=[EventTypes.Member] @@ -283,7 +283,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e1.type, e1.state_key): e1.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=wildcard + # test _get_some_state_from_cache correctly filters in members wildcard types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, None)], filtered_types=[EventTypes.Member] ) @@ -296,7 +296,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=[EventTypes.Member] ) @@ -307,7 +307,7 @@ class StateStoreTestCase(tests.unittest.TestCase): (e5.type, e5.state_key): e5.event_id, }, state_dict) - # test that _get_some_state_from_cache correctly filters out members with types=specific + # test _get_some_state_from_cache correctly filters in members with specific types # and no filtered_types (state_dict, is_all) = yield self.store._get_some_state_from_cache( group, [(EventTypes.Member, e5.state_key)], filtered_types=None From bc7944e6d2ea0076badd0eba414e1ba7020eb1e6 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Wed, 25 Jul 2018 23:36:31 +0100 Subject: [PATCH 48/48] switch missing_types to be a bool --- synapse/storage/state.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index e38427bf9d..b27b3ae144 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -511,8 +511,8 @@ class StateGroupWorkerStore(SQLBaseStore): type_to_key = {} - # tracks which of the requested types are missing from our cache - missing_types = set() + # tracks whether any of ourrequested types are missing from the cache + missing_types = False for typ, state_key in types: key = (typ, state_key) @@ -526,13 +526,13 @@ class StateGroupWorkerStore(SQLBaseStore): # when the cache was populated it might have been done with a # restricted set of state_keys, so the wildcard will not work # and the cache may be incomplete. - missing_types.add(key) + missing_types = True else: if type_to_key.get(typ, object()) is not None: type_to_key.setdefault(typ, set()).add(state_key) if key not in state_dict_ids and key not in known_absent: - missing_types.add(key) + missing_types = True sentinel = object()