Add some miscellaneous comments around sync (#13474)

Add some miscellaneous comments to document sync, especially around `compute_state_delta`. Signed-off-by: Sean Quah <seanq@matrix.org> Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
2022-08-10 12:58:20 +01:00 · 2022-08-10 12:58:20 +01:00 · 51c01d450a
parent 1b09b0832e
commit 51c01d450a
3 changed files with 81 additions and 40 deletions
--- a/changelog.d/13474.misc
+++ b/changelog.d/13474.misc
@ -0,0 +1 @@
+Add some miscellaneous comments to document sync, especially around `compute_state_delta`.
--- a/synapse/handlers/sync.py
+++ b/synapse/handlers/sync.py
@ -13,7 +13,17 @@
 # limitations under the License.
 import itertools
 import logging
-from typing import TYPE_CHECKING, Any, Dict, FrozenSet, List, Optional, Set, Tuple
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Dict,
+    FrozenSet,
+    List,
+    Optional,
+    Sequence,
+    Set,
+    Tuple,
+)

 import attr
 from prometheus_client import Counter
@ -89,7 +99,7 @@ class SyncConfig:
@attr.s(slots=True, frozen=True, auto_attribs=True)
 class TimelineBatch:
    prev_batch: StreamToken
-    events: List[EventBase]
+    events: Sequence[EventBase]
    limited: bool
    # A mapping of event ID to the bundled aggregations for the above events.
    # This is only calculated if limited is true.
@ -852,16 +862,26 @@ class SyncHandler:
        now_token: StreamToken,
        full_state: bool,
    ) -> MutableStateMap[EventBase]:
-        """Works out the difference in state between the start of the timeline
-        and the previous sync.
+        """Works out the difference in state between the end of the previous sync and
+        the start of the timeline.

        Args:
            room_id:
            batch: The timeline batch for the room that will be sent to the user.
            sync_config:
-            since_token: Token of the end of the previous batch. May be None.
+            since_token: Token of the end of the previous batch. May be `None`.
            now_token: Token of the end of the current batch.
            full_state: Whether to force returning the full state.
+                `lazy_load_members` still applies when `full_state` is `True`.
+
+        Returns:
+            The state to return in the sync response for the room.
+
+            Clients will overlay this onto the state at the end of the previous sync to
+            arrive at the state at the start of the timeline.
+
+            Clients will then overlay state events in the timeline to arrive at the
+            state at the end of the timeline, in preparation for the next sync.
        """
        # TODO(mjark) Check if the state events were received by the server
        # after the previous sync, since we need to include those state
@ -869,7 +889,8 @@ class SyncHandler:
        # TODO(mjark) Check for new redactions in the state events.

        with Measure(self.clock, "compute_state_delta"):
-
+            # The memberships needed for events in the timeline.
+            # Only calculated when `lazy_load_members` is on.
            members_to_fetch = None

            lazy_load_members = sync_config.filter_collection.lazy_load_members()
@ -897,38 +918,46 @@ class SyncHandler:
            else:
                state_filter = StateFilter.all()

+            # The contribution to the room state from state events in the timeline.
+            # Only contains the last event for any given state key.
            timeline_state = {
                (event.type, event.state_key): event.event_id
                for event in batch.events
                if event.is_state()
            }

+            # Now calculate the state to return in the sync response for the room.
+            # This is more or less the change in state between the end of the previous
+            # sync's timeline and the start of the current sync's timeline.
+            # See the docstring above for details.
+            state_ids: StateMap[str]
+
            if full_state:
                if batch:
-                    current_state_ids = (
+                    state_at_timeline_end = (
                        await self._state_storage_controller.get_state_ids_for_event(
                            batch.events[-1].event_id, state_filter=state_filter
                        )
                    )

-                    state_ids = (
+                    state_at_timeline_start = (
                        await self._state_storage_controller.get_state_ids_for_event(
                            batch.events[0].event_id, state_filter=state_filter
                        )
                    )

                else:
-                    current_state_ids = await self.get_state_at(
+                    state_at_timeline_end = await self.get_state_at(
                        room_id, stream_position=now_token, state_filter=state_filter
                    )

-                    state_ids = current_state_ids
+                    state_at_timeline_start = state_at_timeline_end

                state_ids = _calculate_state(
                    timeline_contains=timeline_state,
-                    timeline_start=state_ids,
-                    previous={},
-                    current=current_state_ids,
+                    timeline_start=state_at_timeline_start,
+                    timeline_end=state_at_timeline_end,
+                    previous_timeline_end={},
                    lazy_load_members=lazy_load_members,
                )
            elif batch.limited:
@ -968,24 +997,23 @@ class SyncHandler:
                )

                if batch:
-                    current_state_ids = (
+                    state_at_timeline_end = (
                        await self._state_storage_controller.get_state_ids_for_event(
                            batch.events[-1].event_id, state_filter=state_filter
                        )
                    )
                else:
-                    # Its not clear how we get here, but empirically we do
-                    # (#5407). Logging has been added elsewhere to try and
-                    # figure out where this state comes from.
-                    current_state_ids = await self.get_state_at(
+                    # We can get here if the user has ignored the senders of all
+                    # the recent events.
+                    state_at_timeline_end = await self.get_state_at(
                        room_id, stream_position=now_token, state_filter=state_filter
                    )

                state_ids = _calculate_state(
                    timeline_contains=timeline_state,
                    timeline_start=state_at_timeline_start,
-                    previous=state_at_previous_sync,
-                    current=current_state_ids,
+                    timeline_end=state_at_timeline_end,
+                    previous_timeline_end=state_at_previous_sync,
                    # we have to include LL members in case LL initial sync missed them
                    lazy_load_members=lazy_load_members,
                )
@ -1010,6 +1038,13 @@ class SyncHandler:
                            ),
                        )

+            # At this point, if `lazy_load_members` is enabled, `state_ids` includes
+            # the memberships of all event senders in the timeline. This is because we
+            # may not have sent the memberships in a previous sync.
+
+            # When `include_redundant_members` is on, we send all the lazy-loaded
+            # memberships of event senders. Otherwise we make an effort to limit the set
+            # of memberships we send to those that we have not already sent to this client.
            if lazy_load_members and not include_redundant_members:
                cache_key = (sync_config.user.to_string(), sync_config.device_id)
                cache = self.get_lazy_loaded_members_cache(cache_key)
@ -2216,8 +2251,8 @@ def _action_has_highlight(actions: List[JsonDict]) -> bool:
 def _calculate_state(
    timeline_contains: StateMap[str],
    timeline_start: StateMap[str],
-    previous: StateMap[str],
-    current: StateMap[str],
+    timeline_end: StateMap[str],
+    previous_timeline_end: StateMap[str],
    lazy_load_members: bool,
 ) -> StateMap[str]:
    """Works out what state to include in a sync response.
@ -2225,45 +2260,50 @@ def _calculate_state(
    Args:
        timeline_contains: state in the timeline
        timeline_start: state at the start of the timeline
-        previous: state at the end of the previous sync (or empty dict
+        timeline_end: state at the end of the timeline
+        previous_timeline_end: state at the end of the previous sync (or empty dict
            if this is an initial sync)
-        current: state at the end of the timeline
        lazy_load_members: whether to return members from timeline_start
            or not.  assumes that timeline_start has already been filtered to
            include only the members the client needs to know about.
    """
-    event_id_to_key = {
-        e: key
-        for key, e in itertools.chain(
+    event_id_to_state_key = {
+        event_id: state_key
+        for state_key, event_id in itertools.chain(
            timeline_contains.items(),
-            previous.items(),
            timeline_start.items(),
-            current.items(),
+            timeline_end.items(),
+            previous_timeline_end.items(),
        )
    }

-    c_ids = set(current.values())
-    ts_ids = set(timeline_start.values())
-    p_ids = set(previous.values())
-    tc_ids = set(timeline_contains.values())
+    timeline_end_ids = set(timeline_end.values())
+    timeline_start_ids = set(timeline_start.values())
+    previous_timeline_end_ids = set(previous_timeline_end.values())
+    timeline_contains_ids = set(timeline_contains.values())

    # If we are lazyloading room members, we explicitly add the membership events
    # for the senders in the timeline into the state block returned by /sync,
    # as we may not have sent them to the client before.  We find these membership
    # events by filtering them out of timeline_start, which has already been filtered
    # to only include membership events for the senders in the timeline.
-    # In practice, we can do this by removing them from the p_ids list,
-    # which is the list of relevant state we know we have already sent to the client.
+    # In practice, we can do this by removing them from the previous_timeline_end_ids
+    # list, which is the list of relevant state we know we have already sent to the
+    # client.
    # see https://github.com/matrix-org/synapse/pull/2970/files/efcdacad7d1b7f52f879179701c7e0d9b763511f#r204732809

    if lazy_load_members:
-        p_ids.difference_update(
+        previous_timeline_end_ids.difference_update(
            e for t, e in timeline_start.items() if t[0] == EventTypes.Member
        )

-    state_ids = ((c_ids | ts_ids) - p_ids) - tc_ids
+    state_ids = (
+        (timeline_end_ids | timeline_start_ids)
+        - previous_timeline_end_ids
+        - timeline_contains_ids
+    )

-    return {event_id_to_key[e]: e for e in state_ids}
+    return {event_id_to_state_key[e]: e for e in state_ids}


@attr.s(slots=True, auto_attribs=True)
--- a/synapse/visibility.py
+++ b/synapse/visibility.py
@ -73,8 +73,8 @@ async def filter_events_for_client(
          * the user is not currently a member of the room, and:
          * the user has not been a member of the room since the given
            events
-        always_include_ids: set of event ids to specifically
-            include (unless sender is ignored)
+        always_include_ids: set of event ids to specifically include, if present
+            in events (unless sender is ignored)
        filter_send_to_client: Whether we're checking an event that's going to be
            sent to a client. This might not always be the case since this function can
            also be called to check whether a user can see the state at a given point.
				`@ -0,0 +1 @@`
				Add some miscellaneous comments to document sync, especially around `compute_state_delta`.