Merge remote-tracking branch 'origin/develop' into rav/fix_event_filter_validation

2018-10-24 11:23:49 +01:00 · 2018-10-24 11:23:49 +01:00 · 3ad359e5be
parent 7e07d25ed6 7328039117
commit 3ad359e5be
21 changed files with 1344 additions and 81 deletions
--- a/changelog.d/3786.misc
+++ b/changelog.d/3786.misc
@ -0,0 +1 @@
 Add initial implementation of new state resolution algorithm
--- a/changelog.d/4082.misc
+++ b/changelog.d/4082.misc
@ -0,0 +1 @@
 Clean up some bits of code which were flagged by the linter
--- a/scripts-dev/tail-synapse.py
+++ b/scripts-dev/tail-synapse.py
@ -48,7 +48,7 @@ def main():
                row.name: row.position
                for row in replicate(server, {"streams": "-1"})["streams"].rows
            }
-        except requests.exceptions.ConnectionError as e:
+        except requests.exceptions.ConnectionError:
            time.sleep(0.1)
    while True:
--- a/scripts/synapse_port_db
+++ b/scripts/synapse_port_db
@ -501,7 +501,8 @@ class Porter(object):
            try:
                yield self.postgres_store.runInteraction("alter_table", alter_table)
-            except Exception as e:
+            except Exception:
                # On Error Resume Next
                pass
            yield self.postgres_store.runInteraction(
--- a/setup.cfg
+++ b/setup.cfg
@ -14,17 +14,16 @@ ignore =
    pylint.cfg
    tox.ini
 [pep8]
 max-line-length = 90
 #  W503 requires that binary operators be at the end, not start, of lines. Erik
 #  doesn't like it.  E203 is contrary to PEP8.  E731 is silly.
 ignore = W503,E203,E731
 [flake8]
 # note that flake8 inherits the "ignore" settings from "pep8" (because it uses
 # pep8 to do those checks), but not the "max-line-length" setting
 max-line-length = 90
-ignore=W503,E203,E731
+
 # see https://pycodestyle.readthedocs.io/en/latest/intro.html#error-codes
 # for error codes. The ones we ignore are:
 #  W503: line break before binary operator
 #  W504: line break after binary operator
 #  E203: whitespace before ':' (which is contrary to pep8?)
 #  E731: do not assign a lambda expression, use a def
 ignore=W503,W504,E203,E731
 [isort]
 line_length = 89
--- a/synapse/config/repository.py
+++ b/synapse/config/repository.py
@ -178,7 +178,7 @@ class ContentRepositoryConfig(Config):
    def default_config(self, **kwargs):
        media_store = self.default_path("media_store")
        uploads_path = self.default_path("uploads")
-        return """
+        return r"""
        # Directory where uploaded images and attachments are stored.
        media_store_path: "%(media_store)s"
--- a/synapse/crypto/keyclient.py
+++ b/synapse/crypto/keyclient.py
@ -55,7 +55,7 @@ def fetch_server_key(server_name, tls_client_options_factory, path=KEY_API_V1):
                raise IOError("Cannot get key for %r" % server_name)
        except (ConnectError, DomainError) as e:
            logger.warn("Error getting key for %r: %s", server_name, e)
-        except Exception as e:
+        except Exception:
            logger.exception("Error getting key for %r", server_name)
    raise IOError("Cannot get key for %r" % server_name)
--- a/synapse/event_auth.py
+++ b/synapse/event_auth.py
@ -690,7 +690,7 @@ def auth_types_for_event(event):
    auth_types = []
    auth_types.append((EventTypes.PowerLevels, "", ))
-    auth_types.append((EventTypes.Member, event.user_id, ))
+    auth_types.append((EventTypes.Member, event.sender, ))
    auth_types.append((EventTypes.Create, "", ))
    if event.type == EventTypes.Member:
--- a/synapse/federation/federation_server.py
+++ b/synapse/federation/federation_server.py
@ -800,7 +800,7 @@ class FederationHandlerRegistry(object):
            yield handler(origin, content)
        except SynapseError as e:
            logger.info("Failed to handle edu %r: %r", edu_type, e)
-        except Exception as e:
+        except Exception:
            logger.exception("Failed to handle edu %r", edu_type)
    def on_query(self, query_type, args):
--- a/synapse/handlers/federation.py
+++ b/synapse/handlers/federation.py
@ -53,7 +53,7 @@ from synapse.replication.http.federation import (
    ReplicationFederationSendEventsRestServlet,
 )
 from synapse.replication.http.membership import ReplicationUserJoinedLeftRoomRestServlet
-from synapse.state import resolve_events_with_factory
+from synapse.state import StateResolutionStore, resolve_events_with_store
 from synapse.types import UserID, get_domain_from_id
 from synapse.util import logcontext, unwrapFirstError
 from synapse.util.async_helpers import Linearizer
@ -384,24 +384,24 @@ class FederationHandler(BaseHandler):
                            for x in remote_state:
                                event_map[x.event_id] = x
                    # Resolve any conflicting state
                    @defer.inlineCallbacks
                    def fetch(ev_ids):
                        fetched = yield self.store.get_events(
                            ev_ids, get_prev_content=False, check_redacted=False,
                        )
                        # add any events we fetch here to the `event_map` so that we
                        # can use them to build the state event list below.
                        event_map.update(fetched)
                        defer.returnValue(fetched)
                    room_version = yield self.store.get_room_version(room_id)
-                    state_map = yield resolve_events_with_factory(
+                    state_map = yield resolve_events_with_store(
-                        room_version, state_maps, event_map, fetch,
+                        room_version, state_maps, event_map,
                        state_res_store=StateResolutionStore(self.store),
                    )
-                    # we need to give _process_received_pdu the actual state events
+                    # We need to give _process_received_pdu the actual state events
                    # rather than event ids, so generate that now.
                    # First though we need to fetch all the events that are in
                    # state_map, so we can build up the state below.
                    evs = yield self.store.get_events(
                        list(state_map.values()),
                        get_prev_content=False,
                        check_redacted=False,
                    )
                    event_map.update(evs)
                    state = [
                        event_map[e] for e in six.itervalues(state_map)
                    ]
--- a/synapse/rest/client/v2_alpha/auth.py
+++ b/synapse/rest/client/v2_alpha/auth.py
@ -99,7 +99,7 @@ class AuthRestServlet(RestServlet):
    cannot be handled in the normal flow (with requests to the same endpoint).
    Current use is for web fallback auth.
    """
-    PATTERNS = client_v2_patterns("/auth/(?P<stagetype>[\w\.]*)/fallback/web")
+    PATTERNS = client_v2_patterns(r"/auth/(?P<stagetype>[\w\.]*)/fallback/web")
    def __init__(self, hs):
        super(AuthRestServlet, self).__init__()
--- a/synapse/rest/media/v1/preview_url_resource.py
+++ b/synapse/rest/media/v1/preview_url_resource.py
@ -674,7 +674,7 @@ def summarize_paragraphs(text_nodes, min_size=200, max_size=500):
        # This splits the paragraph into words, but keeping the
        # (preceeding) whitespace intact so we can easily concat
        # words back together.
-        for match in re.finditer("\s*\S+", description):
+        for match in re.finditer(r"\s*\S+", description):
            word = match.group()
            # Keep adding words while the total length is less than
--- a/synapse/state/init.py
+++ b/synapse/state/init.py
@ -19,13 +19,14 @@ from collections import namedtuple
 from six import iteritems, itervalues
 import attr
 from frozendict import frozendict
 from twisted.internet import defer
 from synapse.api.constants import EventTypes, RoomVersions
 from synapse.events.snapshot import EventContext
-from synapse.state import v1
+from synapse.state import v1, v2
 from synapse.util.async_helpers import Linearizer
 from synapse.util.caches import get_cache_factor_for
 from synapse.util.caches.expiringcache import ExpiringCache
@ -372,15 +373,10 @@ class StateHandler(object):
        result = yield self._state_resolution_handler.resolve_state_groups(
            room_id, room_version, state_groups_ids, None,
-            self._state_map_factory,
+            state_res_store=StateResolutionStore(self.store),
        )
        defer.returnValue(result)
    def _state_map_factory(self, ev_ids):
        return self.store.get_events(
            ev_ids, get_prev_content=False, check_redacted=False,
        )
    @defer.inlineCallbacks
    def resolve_events(self, room_version, state_sets, event):
        logger.info(
@ -398,10 +394,10 @@ class StateHandler(object):
        }
        with Measure(self.clock, "state._resolve_events"):
-            new_state = yield resolve_events_with_factory(
+            new_state = yield resolve_events_with_store(
                room_version, state_set_ids,
                event_map=state_map,
-                state_map_factory=self._state_map_factory
+                state_res_store=StateResolutionStore(self.store),
            )
        new_state = {
@ -436,7 +432,7 @@ class StateResolutionHandler(object):
    @defer.inlineCallbacks
    @log_function
    def resolve_state_groups(
-        self, room_id, room_version, state_groups_ids, event_map, state_map_factory,
+        self, room_id, room_version, state_groups_ids, event_map, state_res_store,
    ):
        """Resolves conflicts between a set of state groups
@ -454,9 +450,11 @@ class StateResolutionHandler(object):
                a dict from event_id to event, for any events that we happen to
                have in flight (eg, those currently being persisted). This will be
                used as a starting point fof finding the state we need; any missing
-                events will be requested via state_map_factory.
+                events will be requested via state_res_store.
-                If None, all events will be fetched via state_map_factory.
+                If None, all events will be fetched via state_res_store.
            state_res_store (StateResolutionStore)
        Returns:
            Deferred[_StateCacheEntry]: resolved state
@ -480,10 +478,10 @@ class StateResolutionHandler(object):
            # start by assuming we won't have any conflicted state, and build up the new
            # state map by iterating through the state groups. If we discover a conflict,
-            # we give up and instead use `resolve_events_with_factory`.
+            # we give up and instead use `resolve_events_with_store`.
            #
            # XXX: is this actually worthwhile, or should we just let
-            # resolve_events_with_factory do it?
+            # resolve_events_with_store do it?
            new_state = {}
            conflicted_state = False
            for st in itervalues(state_groups_ids):
@ -498,11 +496,11 @@ class StateResolutionHandler(object):
            if conflicted_state:
                logger.info("Resolving conflicted state for %r", room_id)
                with Measure(self.clock, "state._resolve_events"):
-                    new_state = yield resolve_events_with_factory(
+                    new_state = yield resolve_events_with_store(
                        room_version,
                        list(itervalues(state_groups_ids)),
                        event_map=event_map,
-                        state_map_factory=state_map_factory,
+                        state_res_store=state_res_store,
                    )
            # if the new state matches any of the input state groups, we can
@ -583,7 +581,7 @@ def _make_state_cache_entry(
    )
-def resolve_events_with_factory(room_version, state_sets, event_map, state_map_factory):
+def resolve_events_with_store(room_version, state_sets, event_map, state_res_store):
    """
    Args:
        room_version(str): Version of the room
@ -599,17 +597,19 @@ def resolve_events_with_factory(room_version, state_sets, event_map, state_map_f
            If None, all events will be fetched via state_map_factory.
-        state_map_factory(func): will be called
+        state_res_store (StateResolutionStore)
            with a list of event_ids that are needed, and should return with
            a Deferred of dict of event_id to event.
    Returns
        Deferred[dict[(str, str), str]]:
            a map from (type, state_key) to event_id.
    """
-    if room_version in (RoomVersions.V1, RoomVersions.VDH_TEST,):
+    if room_version == RoomVersions.V1:
-        return v1.resolve_events_with_factory(
+        return v1.resolve_events_with_store(
-            state_sets, event_map, state_map_factory,
+            state_sets, event_map, state_res_store.get_events,
        )
    elif room_version == RoomVersions.VDH_TEST:
        return v2.resolve_events_with_store(
            state_sets, event_map, state_res_store,
        )
    else:
        # This should only happen if we added a version but forgot to add it to
@ -617,3 +617,54 @@ def resolve_events_with_factory(room_version, state_sets, event_map, state_map_f
        raise Exception(
            "No state resolution algorithm defined for version %r" % (room_version,)
        )
@attr.s
 class StateResolutionStore(object):
    """Interface that allows state resolution algorithms to access the database
    in well defined way.
    Args:
        store (DataStore)
    """
    store = attr.ib()
    def get_events(self, event_ids, allow_rejected=False):
        """Get events from the database
        Args:
            event_ids (list): The event_ids of the events to fetch
            allow_rejected (bool): If True return rejected events.
        Returns:
            Deferred[dict[str, FrozenEvent]]: Dict from event_id to event.
        """
        return self.store.get_events(
            event_ids,
            check_redacted=False,
            get_prev_content=False,
            allow_rejected=allow_rejected,
        )
    def get_auth_chain(self, event_ids):
        """Gets the full auth chain for a set of events (including rejected
        events).
        Includes the given event IDs in the result.
        Note that:
            1. All events must be state events.
            2. For v1 rooms this may not have the full auth chain in the
               presence of rejected events
        Args:
            event_ids (list): The event IDs of the events to fetch the auth
                chain for. Must be state events.
        Returns:
            Deferred[list[str]]: List of event IDs of the auth chain.
        """
        return self.store.get_auth_chain_ids(event_ids, include_given=True)
--- a/synapse/state/v1.py
+++ b/synapse/state/v1.py
@ -31,7 +31,7 @@ POWER_KEY = (EventTypes.PowerLevels, "")
@defer.inlineCallbacks
-def resolve_events_with_factory(state_sets, event_map, state_map_factory):
+def resolve_events_with_store(state_sets, event_map, state_map_factory):
    """
    Args:
        state_sets(list): List of dicts of (type, state_key) -> event_id,
--- a/synapse/state/v2.py
+++ b/synapse/state/v2.py
@ -0,0 +1,544 @@
 # -*- coding: utf-8 -*-
 # Copyright 2018 New Vector Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import heapq
 import itertools
 import logging
 from six import iteritems, itervalues
 from twisted.internet import defer
 from synapse import event_auth
 from synapse.api.constants import EventTypes
 from synapse.api.errors import AuthError
 logger = logging.getLogger(__name__)
@defer.inlineCallbacks
 def resolve_events_with_store(state_sets, event_map, state_res_store):
    """Resolves the state using the v2 state resolution algorithm
    Args:
        state_sets(list): List of dicts of (type, state_key) -> event_id,
            which are the different state groups to resolve.
        event_map(dict[str,FrozenEvent]|None):
            a dict from event_id to event, for any events that we happen to
            have in flight (eg, those currently being persisted). This will be
            used as a starting point fof finding the state we need; any missing
            events will be requested via state_res_store.
            If None, all events will be fetched via state_res_store.
        state_res_store (StateResolutionStore)
    Returns
        Deferred[dict[(str, str), str]]:
            a map from (type, state_key) to event_id.
    """
    logger.debug("Computing conflicted state")
    # First split up the un/conflicted state
    unconflicted_state, conflicted_state = _seperate(state_sets)
    if not conflicted_state:
        defer.returnValue(unconflicted_state)
    logger.debug("%d conflicted state entries", len(conflicted_state))
    logger.debug("Calculating auth chain difference")
    # Also fetch all auth events that appear in only some of the state sets'
    # auth chains.
    auth_diff = yield _get_auth_chain_difference(
        state_sets, event_map, state_res_store,
    )
    full_conflicted_set = set(itertools.chain(
        itertools.chain.from_iterable(itervalues(conflicted_state)),
        auth_diff,
    ))
    events = yield state_res_store.get_events([
        eid for eid in full_conflicted_set
        if eid not in event_map
    ], allow_rejected=True)
    event_map.update(events)
    full_conflicted_set = set(eid for eid in full_conflicted_set if eid in event_map)
    logger.debug("%d full_conflicted_set entries", len(full_conflicted_set))
    # Get and sort all the power events (kicks/bans/etc)
    power_events = (
        eid for eid in full_conflicted_set
        if _is_power_event(event_map[eid])
    )
    sorted_power_events = yield _reverse_topological_power_sort(
        power_events,
        event_map,
        state_res_store,
        full_conflicted_set,
    )
    logger.debug("sorted %d power events", len(sorted_power_events))
    # Now sequentially auth each one
    resolved_state = yield _iterative_auth_checks(
        sorted_power_events, unconflicted_state, event_map,
        state_res_store,
    )
    logger.debug("resolved power events")
    # OK, so we've now resolved the power events. Now sort the remaining
    # events using the mainline of the resolved power level.
    leftover_events = [
        ev_id
        for ev_id in full_conflicted_set
        if ev_id not in sorted_power_events
    ]
    logger.debug("sorting %d remaining events", len(leftover_events))
    pl = resolved_state.get((EventTypes.PowerLevels, ""), None)
    leftover_events = yield _mainline_sort(
        leftover_events, pl, event_map, state_res_store,
    )
    logger.debug("resolving remaining events")
    resolved_state = yield _iterative_auth_checks(
        leftover_events, resolved_state, event_map,
        state_res_store,
    )
    logger.debug("resolved")
    # We make sure that unconflicted state always still applies.
    resolved_state.update(unconflicted_state)
    logger.debug("done")
    defer.returnValue(resolved_state)
@defer.inlineCallbacks
 def _get_power_level_for_sender(event_id, event_map, state_res_store):
    """Return the power level of the sender of the given event according to
    their auth events.
    Args:
        event_id (str)
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
    Returns:
        Deferred[int]
    """
    event = yield _get_event(event_id, event_map, state_res_store)
    pl = None
    for aid, _ in event.auth_events:
        aev = yield _get_event(aid, event_map, state_res_store)
        if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
            pl = aev
            break
    if pl is None:
        # Couldn't find power level. Check if they're the creator of the room
        for aid, _ in event.auth_events:
            aev = yield _get_event(aid, event_map, state_res_store)
            if (aev.type, aev.state_key) == (EventTypes.Create, ""):
                if aev.content.get("creator") == event.sender:
                    defer.returnValue(100)
                break
        defer.returnValue(0)
    level = pl.content.get("users", {}).get(event.sender)
    if level is None:
        level = pl.content.get("users_default", 0)
    if level is None:
        defer.returnValue(0)
    else:
        defer.returnValue(int(level))
@defer.inlineCallbacks
 def _get_auth_chain_difference(state_sets, event_map, state_res_store):
    """Compare the auth chains of each state set and return the set of events
    that only appear in some but not all of the auth chains.
    Args:
        state_sets (list)
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
    Returns:
        Deferred[set[str]]: Set of event IDs
    """
    common = set(itervalues(state_sets[0])).intersection(
        *(itervalues(s) for s in state_sets[1:])
    )
    auth_sets = []
    for state_set in state_sets:
        auth_ids = set(
            eid
            for key, eid in iteritems(state_set)
            if (key[0] in (
                EventTypes.Member,
                EventTypes.ThirdPartyInvite,
            ) or key in (
                (EventTypes.PowerLevels, ''),
                (EventTypes.Create, ''),
                (EventTypes.JoinRules, ''),
            )) and eid not in common
        )
        auth_chain = yield state_res_store.get_auth_chain(auth_ids)
        auth_ids.update(auth_chain)
        auth_sets.append(auth_ids)
    intersection = set(auth_sets[0]).intersection(*auth_sets[1:])
    union = set().union(*auth_sets)
    defer.returnValue(union - intersection)
 def _seperate(state_sets):
    """Return the unconflicted and conflicted state. This is different than in
    the original algorithm, as this defines a key to be conflicted if one of
    the state sets doesn't have that key.
    Args:
        state_sets (list)
    Returns:
        tuple[dict, dict]: A tuple of unconflicted and conflicted state. The
        conflicted state dict is a map from type/state_key to set of event IDs
    """
    unconflicted_state = {}
    conflicted_state = {}
    for key in set(itertools.chain.from_iterable(state_sets)):
        event_ids = set(state_set.get(key) for state_set in state_sets)
        if len(event_ids) == 1:
            unconflicted_state[key] = event_ids.pop()
        else:
            event_ids.discard(None)
            conflicted_state[key] = event_ids
    return unconflicted_state, conflicted_state
 def _is_power_event(event):
    """Return whether or not the event is a "power event", as defined by the
    v2 state resolution algorithm
    Args:
        event (FrozenEvent)
    Returns:
        boolean
    """
    if (event.type, event.state_key) in (
        (EventTypes.PowerLevels, ""),
        (EventTypes.JoinRules, ""),
        (EventTypes.Create, ""),
    ):
        return True
    if event.type == EventTypes.Member:
        if event.membership in ('leave', 'ban'):
            return event.sender != event.state_key
    return False
@defer.inlineCallbacks
 def _add_event_and_auth_chain_to_graph(graph, event_id, event_map,
                                       state_res_store, auth_diff):
    """Helper function for _reverse_topological_power_sort that add the event
    and its auth chain (that is in the auth diff) to the graph
    Args:
        graph (dict[str, set[str]]): A map from event ID to the events auth
            event IDs
        event_id (str): Event to add to the graph
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
        auth_diff (set[str]): Set of event IDs that are in the auth difference.
    """
    state = [event_id]
    while state:
        eid = state.pop()
        graph.setdefault(eid, set())
        event = yield _get_event(eid, event_map, state_res_store)
        for aid, _ in event.auth_events:
            if aid in auth_diff:
                if aid not in graph:
                    state.append(aid)
                graph.setdefault(eid, set()).add(aid)
@defer.inlineCallbacks
 def _reverse_topological_power_sort(event_ids, event_map, state_res_store, auth_diff):
    """Returns a list of the event_ids sorted by reverse topological ordering,
    and then by power level and origin_server_ts
    Args:
        event_ids (list[str]): The events to sort
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
        auth_diff (set[str]): Set of event IDs that are in the auth difference.
    Returns:
        Deferred[list[str]]: The sorted list
    """
    graph = {}
    for event_id in event_ids:
        yield _add_event_and_auth_chain_to_graph(
            graph, event_id, event_map, state_res_store, auth_diff,
        )
    event_to_pl = {}
    for event_id in graph:
        pl = yield _get_power_level_for_sender(event_id, event_map, state_res_store)
        event_to_pl[event_id] = pl
    def _get_power_order(event_id):
        ev = event_map[event_id]
        pl = event_to_pl[event_id]
        return -pl, ev.origin_server_ts, event_id
    # Note: graph is modified during the sort
    it = lexicographical_topological_sort(
        graph,
        key=_get_power_order,
    )
    sorted_events = list(it)
    defer.returnValue(sorted_events)
@defer.inlineCallbacks
 def _iterative_auth_checks(event_ids, base_state, event_map, state_res_store):
    """Sequentially apply auth checks to each event in given list, updating the
    state as it goes along.
    Args:
        event_ids (list[str]): Ordered list of events to apply auth checks to
        base_state (dict[tuple[str, str], str]): The set of state to start with
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
    Returns:
        Deferred[dict[tuple[str, str], str]]: Returns the final updated state
    """
    resolved_state = base_state.copy()
    for event_id in event_ids:
        event = event_map[event_id]
        auth_events = {}
        for aid, _ in event.auth_events:
            ev = yield _get_event(aid, event_map, state_res_store)
            if ev.rejected_reason is None:
                auth_events[(ev.type, ev.state_key)] = ev
        for key in event_auth.auth_types_for_event(event):
            if key in resolved_state:
                ev_id = resolved_state[key]
                ev = yield _get_event(ev_id, event_map, state_res_store)
                if ev.rejected_reason is None:
                    auth_events[key] = event_map[ev_id]
        try:
            event_auth.check(
                event, auth_events,
                do_sig_check=False,
                do_size_check=False
            )
            resolved_state[(event.type, event.state_key)] = event_id
        except AuthError:
            pass
    defer.returnValue(resolved_state)
@defer.inlineCallbacks
 def _mainline_sort(event_ids, resolved_power_event_id, event_map,
                   state_res_store):
    """Returns a sorted list of event_ids sorted by mainline ordering based on
    the given event resolved_power_event_id
    Args:
        event_ids (list[str]): Events to sort
        resolved_power_event_id (str): The final resolved power level event ID
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
    Returns:
        Deferred[list[str]]: The sorted list
    """
    mainline = []
    pl = resolved_power_event_id
    while pl:
        mainline.append(pl)
        pl_ev = yield _get_event(pl, event_map, state_res_store)
        auth_events = pl_ev.auth_events
        pl = None
        for aid, _ in auth_events:
            ev = yield _get_event(aid, event_map, state_res_store)
            if (ev.type, ev.state_key) == (EventTypes.PowerLevels, ""):
                pl = aid
                break
    mainline_map = {ev_id: i + 1 for i, ev_id in enumerate(reversed(mainline))}
    event_ids = list(event_ids)
    order_map = {}
    for ev_id in event_ids:
        depth = yield _get_mainline_depth_for_event(
            event_map[ev_id], mainline_map,
            event_map, state_res_store,
        )
        order_map[ev_id] = (depth, event_map[ev_id].origin_server_ts, ev_id)
    event_ids.sort(key=lambda ev_id: order_map[ev_id])
    defer.returnValue(event_ids)
@defer.inlineCallbacks
 def _get_mainline_depth_for_event(event, mainline_map, event_map, state_res_store):
    """Get the mainline depths for the given event based on the mainline map
    Args:
        event (FrozenEvent)
        mainline_map (dict[str, int]): Map from event_id to mainline depth for
            events in the mainline.
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
    Returns:
        Deferred[int]
    """
    # We do an iterative search, replacing `event with the power level in its
    # auth events (if any)
    while event:
        depth = mainline_map.get(event.event_id)
        if depth is not None:
            defer.returnValue(depth)
        auth_events = event.auth_events
        event = None
        for aid, _ in auth_events:
            aev = yield _get_event(aid, event_map, state_res_store)
            if (aev.type, aev.state_key) == (EventTypes.PowerLevels, ""):
                event = aev
                break
    # Didn't find a power level auth event, so we just return 0
    defer.returnValue(0)
@defer.inlineCallbacks
 def _get_event(event_id, event_map, state_res_store):
    """Helper function to look up event in event_map, falling back to looking
    it up in the store
    Args:
        event_id (str)
        event_map (dict[str,FrozenEvent])
        state_res_store (StateResolutionStore)
    Returns:
        Deferred[FrozenEvent]
    """
    if event_id not in event_map:
        events = yield state_res_store.get_events([event_id], allow_rejected=True)
        event_map.update(events)
    defer.returnValue(event_map[event_id])
 def lexicographical_topological_sort(graph, key):
    """Performs a lexicographic reverse topological sort on the graph.
    This returns a reverse topological sort (i.e. if node A references B then B
    appears before A in the sort), with ties broken lexicographically based on
    return value of the `key` function.
    NOTE: `graph` is modified during the sort.
    Args:
        graph (dict[str, set[str]]): A representation of the graph where each
            node is a key in the dict and its value are the nodes edges.
        key (func): A function that takes a node and returns a value that is
            comparable and used to order nodes
    Yields:
        str: The next node in the topological sort
    """
    # Note, this is basically Kahn's algorithm except we look at nodes with no
    # outgoing edges, c.f.
    # https://en.wikipedia.org/wiki/Topological_sorting#Kahn's_algorithm
    outdegree_map = graph
    reverse_graph = {}
    # Lists of nodes with zero out degree. Is actually a tuple of
    # `(key(node), node)` so that sorting does the right thing
    zero_outdegree = []
    for node, edges in iteritems(graph):
        if len(edges) == 0:
            zero_outdegree.append((key(node), node))
        reverse_graph.setdefault(node, set())
        for edge in edges:
            reverse_graph.setdefault(edge, set()).add(node)
    # heapq is a built in implementation of a sorted queue.
    heapq.heapify(zero_outdegree)
    while zero_outdegree:
        _, node = heapq.heappop(zero_outdegree)
        for parent in reverse_graph[node]:
            out = outdegree_map[parent]
            out.discard(node)
            if len(out) == 0:
                heapq.heappush(zero_outdegree, (key(parent), parent))
        yield node
--- a/synapse/storage/events.py
+++ b/synapse/storage/events.py
@ -34,6 +34,7 @@ from synapse.api.errors import SynapseError
 from synapse.events import EventBase  # noqa: F401
 from synapse.events.snapshot import EventContext  # noqa: F401
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.state import StateResolutionStore
 from synapse.storage.background_updates import BackgroundUpdateStore
 from synapse.storage.event_federation import EventFederationStore
 from synapse.storage.events_worker import EventsWorkerStore
@ -731,11 +732,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
        # Ok, we need to defer to the state handler to resolve our state sets.
        def get_events(ev_ids):
            return self.get_events(
                ev_ids, get_prev_content=False, check_redacted=False,
            )
        state_groups = {
            sg: state_groups_map[sg] for sg in new_state_groups
        }
@ -745,7 +741,8 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
        logger.debug("calling resolve_state_groups from preserve_events")
        res = yield self._state_resolution_handler.resolve_state_groups(
-            room_id, room_version, state_groups, events_map, get_events
+            room_id, room_version, state_groups, events_map,
            state_res_store=StateResolutionStore(self)
        )
        defer.returnValue((res.state, None))
@ -854,6 +851,27 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
        # Insert into event_to_state_groups.
        self._store_event_state_mappings_txn(txn, events_and_contexts)
        # We want to store event_auth mappings for rejected events, as they're
        # used in state res v2.
        # This is only necessary if the rejected event appears in an accepted
        # event's auth chain, but its easier for now just to store them (and
        # it doesn't take much storage compared to storing the entire event
        # anyway).
        self._simple_insert_many_txn(
            txn,
            table="event_auth",
            values=[
                {
                    "event_id": event.event_id,
                    "room_id": event.room_id,
                    "auth_id": auth_id,
                }
                for event, _ in events_and_contexts
                for auth_id, _ in event.auth_events
                if event.is_state()
            ],
        )
        # _store_rejected_events_txn filters out any events which were
        # rejected, and returns the filtered list.
        events_and_contexts = self._store_rejected_events_txn(
@ -1329,21 +1347,6 @@ class EventsStore(EventFederationStore, EventsWorkerStore, BackgroundUpdateStore
                    txn, event.room_id, event.redacts
                )
        self._simple_insert_many_txn(
            txn,
            table="event_auth",
            values=[
                {
                    "event_id": event.event_id,
                    "room_id": event.room_id,
                    "auth_id": auth_id,
                }
                for event, _ in events_and_contexts
                for auth_id, _ in event.auth_events
                if event.is_state()
            ],
        )
        # Update the event_forward_extremities, event_backward_extremities and
        # event_edges tables.
        self._handle_mult_prev_events(
--- a/synapse/storage/registration.py
+++ b/synapse/storage/registration.py
@ -567,7 +567,7 @@ class RegistrationStore(RegistrationWorkerStore,
        def _find_next_generated_user_id(txn):
            txn.execute("SELECT name FROM users")
-            regex = re.compile("^@(\d+):")
+            regex = re.compile(r"^@(\d+):")
            found = set()
--- a/tests/config/test_generate.py
+++ b/tests/config/test_generate.py
@ -67,6 +67,6 @@ class ConfigGenerationTestCase(unittest.TestCase):
        with open(log_config_file) as f:
            config = f.read()
            # find the 'filename' line
-            matches = re.findall("^\s*filename:\s*(.*)$", config, re.M)
+            matches = re.findall(r"^\s*filename:\s*(.*)$", config, re.M)
            self.assertEqual(1, len(matches))
            self.assertEqual(matches[0], expected)
--- a/tests/events/test_utils.py
+++ b/tests/events/test_utils.py
@ -156,7 +156,7 @@ class SerializeEventTestCase(unittest.TestCase):
                    room_id="!foo:bar",
                    content={"key.with.dots": {}},
                ),
-                ["content.key\.with\.dots"],
+                [r"content.key\.with\.dots"],
            ),
            {"content": {"key.with.dots": {}}},
        )
@ -172,7 +172,7 @@ class SerializeEventTestCase(unittest.TestCase):
                        "nested.dot.key": {"leaf.key": 42, "not_me_either": 1},
                    },
                ),
-                ["content.nested\.dot\.key.leaf\.key"],
+                [r"content.nested\.dot\.key.leaf\.key"],
            ),
            {"content": {"nested.dot.key": {"leaf.key": 42}}},
        )
--- a/tests/state/init.py
+++ b/tests/state/init.py
--- a/tests/state/test_v2.py
+++ b/tests/state/test_v2.py
@ -0,0 +1,663 @@
 # -*- coding: utf-8 -*-
 # Copyright 2018 New Vector Ltd
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
 # You may obtain a copy of the License at
 #
 #     http://www.apache.org/licenses/LICENSE-2.0
 #
 # Unless required by applicable law or agreed to in writing, software
 # distributed under the License is distributed on an "AS IS" BASIS,
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import itertools
 from six.moves import zip
 import attr
 from synapse.api.constants import EventTypes, JoinRules, Membership
 from synapse.event_auth import auth_types_for_event
 from synapse.events import FrozenEvent
 from synapse.state.v2 import lexicographical_topological_sort, resolve_events_with_store
 from synapse.types import EventID
 from tests import unittest
 ALICE = "@alice:example.com"
 BOB = "@bob:example.com"
 CHARLIE = "@charlie:example.com"
 EVELYN = "@evelyn:example.com"
 ZARA = "@zara:example.com"
 ROOM_ID = "!test:example.com"
 MEMBERSHIP_CONTENT_JOIN = {"membership": Membership.JOIN}
 MEMBERSHIP_CONTENT_BAN = {"membership": Membership.BAN}
 ORIGIN_SERVER_TS = 0
 class FakeEvent(object):
    """A fake event we use as a convenience.
    NOTE: Again as a convenience we use "node_ids" rather than event_ids to
    refer to events. The event_id has node_id as localpart and example.com
    as domain.
    """
    def __init__(self, id, sender, type, state_key, content):
        self.node_id = id
        self.event_id = EventID(id, "example.com").to_string()
        self.sender = sender
        self.type = type
        self.state_key = state_key
        self.content = content
    def to_event(self, auth_events, prev_events):
        """Given the auth_events and prev_events, convert to a Frozen Event
        Args:
            auth_events (list[str]): list of event_ids
            prev_events (list[str]): list of event_ids
        Returns:
            FrozenEvent
        """
        global ORIGIN_SERVER_TS
        ts = ORIGIN_SERVER_TS
        ORIGIN_SERVER_TS = ORIGIN_SERVER_TS + 1
        event_dict = {
            "auth_events": [(a, {}) for a in auth_events],
            "prev_events": [(p, {}) for p in prev_events],
            "event_id": self.node_id,
            "sender": self.sender,
            "type": self.type,
            "content": self.content,
            "origin_server_ts": ts,
            "room_id": ROOM_ID,
        }
        if self.state_key is not None:
            event_dict["state_key"] = self.state_key
        return FrozenEvent(event_dict)
 # All graphs start with this set of events
 INITIAL_EVENTS = [
    FakeEvent(
        id="CREATE",
        sender=ALICE,
        type=EventTypes.Create,
        state_key="",
        content={"creator": ALICE},
    ),
    FakeEvent(
        id="IMA",
        sender=ALICE,
        type=EventTypes.Member,
        state_key=ALICE,
        content=MEMBERSHIP_CONTENT_JOIN,
    ),
    FakeEvent(
        id="IPOWER",
        sender=ALICE,
        type=EventTypes.PowerLevels,
        state_key="",
        content={"users": {ALICE: 100}},
    ),
    FakeEvent(
        id="IJR",
        sender=ALICE,
        type=EventTypes.JoinRules,
        state_key="",
        content={"join_rule": JoinRules.PUBLIC},
    ),
    FakeEvent(
        id="IMB",
        sender=BOB,
        type=EventTypes.Member,
        state_key=BOB,
        content=MEMBERSHIP_CONTENT_JOIN,
    ),
    FakeEvent(
        id="IMC",
        sender=CHARLIE,
        type=EventTypes.Member,
        state_key=CHARLIE,
        content=MEMBERSHIP_CONTENT_JOIN,
    ),
    FakeEvent(
        id="IMZ",
        sender=ZARA,
        type=EventTypes.Member,
        state_key=ZARA,
        content=MEMBERSHIP_CONTENT_JOIN,
    ),
    FakeEvent(
        id="START",
        sender=ZARA,
        type=EventTypes.Message,
        state_key=None,
        content={},
    ),
    FakeEvent(
        id="END",
        sender=ZARA,
        type=EventTypes.Message,
        state_key=None,
        content={},
    ),
 ]
 INITIAL_EDGES = [
    "START", "IMZ", "IMC", "IMB", "IJR", "IPOWER", "IMA", "CREATE",
 ]
 class StateTestCase(unittest.TestCase):
    def test_ban_vs_pl(self):
        events = [
            FakeEvent(
                id="PA",
                sender=ALICE,
                type=EventTypes.PowerLevels,
                state_key="",
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    }
                },
            ),
            FakeEvent(
                id="MA",
                sender=ALICE,
                type=EventTypes.Member,
                state_key=ALICE,
                content={"membership": Membership.JOIN},
            ),
            FakeEvent(
                id="MB",
                sender=ALICE,
                type=EventTypes.Member,
                state_key=BOB,
                content={"membership": Membership.BAN},
            ),
            FakeEvent(
                id="PB",
                sender=BOB,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    },
                },
            ),
        ]
        edges = [
            ["END", "MB", "MA", "PA", "START"],
            ["END", "PB", "PA"],
        ]
        expected_state_ids = ["PA", "MA", "MB"]
        self.do_check(events, edges, expected_state_ids)
    def test_join_rule_evasion(self):
        events = [
            FakeEvent(
                id="JR",
                sender=ALICE,
                type=EventTypes.JoinRules,
                state_key="",
                content={"join_rules": JoinRules.PRIVATE},
            ),
            FakeEvent(
                id="ME",
                sender=EVELYN,
                type=EventTypes.Member,
                state_key=EVELYN,
                content={"membership": Membership.JOIN},
            ),
        ]
        edges = [
            ["END", "JR", "START"],
            ["END", "ME", "START"],
        ]
        expected_state_ids = ["JR"]
        self.do_check(events, edges, expected_state_ids)
    def test_offtopic_pl(self):
        events = [
            FakeEvent(
                id="PA",
                sender=ALICE,
                type=EventTypes.PowerLevels,
                state_key="",
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    }
                },
            ),
            FakeEvent(
                id="PB",
                sender=BOB,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                        CHARLIE: 50,
                    },
                },
            ),
            FakeEvent(
                id="PC",
                sender=CHARLIE,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                        CHARLIE: 0,
                    },
                },
            ),
        ]
        edges = [
            ["END", "PC", "PB", "PA", "START"],
            ["END", "PA"],
        ]
        expected_state_ids = ["PC"]
        self.do_check(events, edges, expected_state_ids)
    def test_topic_basic(self):
        events = [
            FakeEvent(
                id="T1",
                sender=ALICE,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
            FakeEvent(
                id="PA1",
                sender=ALICE,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    },
                },
            ),
            FakeEvent(
                id="T2",
                sender=ALICE,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
            FakeEvent(
                id="PA2",
                sender=ALICE,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 0,
                    },
                },
            ),
            FakeEvent(
                id="PB",
                sender=BOB,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    },
                },
            ),
            FakeEvent(
                id="T3",
                sender=BOB,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
        ]
        edges = [
            ["END", "PA2", "T2", "PA1", "T1", "START"],
            ["END", "T3", "PB", "PA1"],
        ]
        expected_state_ids = ["PA2", "T2"]
        self.do_check(events, edges, expected_state_ids)
    def test_topic_reset(self):
        events = [
            FakeEvent(
                id="T1",
                sender=ALICE,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
            FakeEvent(
                id="PA",
                sender=ALICE,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    },
                },
            ),
            FakeEvent(
                id="T2",
                sender=BOB,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
            FakeEvent(
                id="MB",
                sender=ALICE,
                type=EventTypes.Member,
                state_key=BOB,
                content={"membership": Membership.BAN},
            ),
        ]
        edges = [
            ["END", "MB", "T2", "PA", "T1", "START"],
            ["END", "T1"],
        ]
        expected_state_ids = ["T1", "MB", "PA"]
        self.do_check(events, edges, expected_state_ids)
    def test_topic(self):
        events = [
            FakeEvent(
                id="T1",
                sender=ALICE,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
            FakeEvent(
                id="PA1",
                sender=ALICE,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    },
                },
            ),
            FakeEvent(
                id="T2",
                sender=ALICE,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
            FakeEvent(
                id="PA2",
                sender=ALICE,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 0,
                    },
                },
            ),
            FakeEvent(
                id="PB",
                sender=BOB,
                type=EventTypes.PowerLevels,
                state_key='',
                content={
                    "users": {
                        ALICE: 100,
                        BOB: 50,
                    },
                },
            ),
            FakeEvent(
                id="T3",
                sender=BOB,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
            FakeEvent(
                id="MZ1",
                sender=ZARA,
                type=EventTypes.Message,
                state_key=None,
                content={},
            ),
            FakeEvent(
                id="T4",
                sender=ALICE,
                type=EventTypes.Topic,
                state_key="",
                content={},
            ),
        ]
        edges = [
            ["END", "T4", "MZ1", "PA2", "T2", "PA1", "T1", "START"],
            ["END", "MZ1", "T3", "PB", "PA1"],
        ]
        expected_state_ids = ["T4", "PA2"]
        self.do_check(events, edges, expected_state_ids)
    def do_check(self, events, edges, expected_state_ids):
        """Take a list of events and edges and calculate the state of the
        graph at END, and asserts it matches `expected_state_ids`
        Args:
            events (list[FakeEvent])
            edges (list[list[str]]): A list of chains of event edges, e.g.
                `[[A, B, C]]` are edges A->B and B->C.
            expected_state_ids (list[str]): The expected state at END, (excluding
                the keys that haven't changed since START).
        """
        # We want to sort the events into topological order for processing.
        graph = {}
        # node_id -> FakeEvent
        fake_event_map = {}
        for ev in itertools.chain(INITIAL_EVENTS, events):
            graph[ev.node_id] = set()
            fake_event_map[ev.node_id] = ev
        for a, b in pairwise(INITIAL_EDGES):
            graph[a].add(b)
        for edge_list in edges:
            for a, b in pairwise(edge_list):
                graph[a].add(b)
        # event_id -> FrozenEvent
        event_map = {}
        # node_id -> state
        state_at_event = {}
        # We copy the map as the sort consumes the graph
        graph_copy = {k: set(v) for k, v in graph.items()}
        for node_id in lexicographical_topological_sort(graph_copy, key=lambda e: e):
            fake_event = fake_event_map[node_id]
            event_id = fake_event.event_id
            prev_events = list(graph[node_id])
            if len(prev_events) == 0:
                state_before = {}
            elif len(prev_events) == 1:
                state_before = dict(state_at_event[prev_events[0]])
            else:
                state_d = resolve_events_with_store(
                    [state_at_event[n] for n in prev_events],
                    event_map=event_map,
                    state_res_store=TestStateResolutionStore(event_map),
                )
                self.assertTrue(state_d.called)
                state_before = state_d.result
            state_after = dict(state_before)
            if fake_event.state_key is not None:
                state_after[(fake_event.type, fake_event.state_key)] = event_id
            auth_types = set(auth_types_for_event(fake_event))
            auth_events = []
            for key in auth_types:
                if key in state_before:
                    auth_events.append(state_before[key])
            event = fake_event.to_event(auth_events, prev_events)
            state_at_event[node_id] = state_after
            event_map[event_id] = event
        expected_state = {}
        for node_id in expected_state_ids:
            # expected_state_ids are node IDs rather than event IDs,
            # so we have to convert
            event_id = EventID(node_id, "example.com").to_string()
            event = event_map[event_id]
            key = (event.type, event.state_key)
            expected_state[key] = event_id
        start_state = state_at_event["START"]
        end_state = {
            key: value
            for key, value in state_at_event["END"].items()
            if key in expected_state or start_state.get(key) != value
        }
        self.assertEqual(expected_state, end_state)
 class LexicographicalTestCase(unittest.TestCase):
    def test_simple(self):
        graph = {
            "l": {"o"},
            "m": {"n", "o"},
            "n": {"o"},
            "o": set(),
            "p": {"o"},
        }
        res = list(lexicographical_topological_sort(graph, key=lambda x: x))
        self.assertEqual(["o", "l", "n", "m", "p"], res)
 def pairwise(iterable):
    "s -> (s0,s1), (s1,s2), (s2, s3), ..."
    a, b = itertools.tee(iterable)
    next(b, None)
    return zip(a, b)
@attr.s
 class TestStateResolutionStore(object):
    event_map = attr.ib()
    def get_events(self, event_ids, allow_rejected=False):
        """Get events from the database
        Args:
            event_ids (list): The event_ids of the events to fetch
            allow_rejected (bool): If True return rejected events.
        Returns:
            Deferred[dict[str, FrozenEvent]]: Dict from event_id to event.
        """
        return {
            eid: self.event_map[eid]
            for eid in event_ids
            if eid in self.event_map
        }
    def get_auth_chain(self, event_ids):
        """Gets the full auth chain for a set of events (including rejected
        events).
        Includes the given event IDs in the result.
        Note that:
            1. All events must be state events.
            2. For v1 rooms this may not have the full auth chain in the
               presence of rejected events
        Args:
            event_ids (list): The event IDs of the events to fetch the auth
                chain for. Must be state events.
        Returns:
            Deferred[list[str]]: List of event IDs of the auth chain.
        """
        # Simple DFS for auth chain
        result = set()
        stack = list(event_ids)
        while stack:
            event_id = stack.pop()
            if event_id in result:
                continue
            result.add(event_id)
            event = self.event_map[event_id]
            for aid, _ in event.auth_events:
                stack.append(aid)
        return list(result)
		`@ -0,0 +1 @@`
							`Add initial implementation of new state resolution algorithm`
		`@ -0,0 +1 @@`
							`Clean up some bits of code which were flagged by the linter`