Move some replication processing out of generic_worker (#9796)

Co-authored-by: Richard van der Hoff <1389908+richvdh@users.noreply.github.com>
2021-04-14 17:06:06 +01:00 · 2021-04-14 17:06:06 +01:00 · 00a6db9676
parent c9a2b5d402
commit 00a6db9676
6 changed files with 486 additions and 483 deletions
--- a/changelog.d/9796.misc
+++ b/changelog.d/9796.misc
@ -0,0 +1 @@
 Move some replication processing out of `generic_worker`.
--- a/synapse/app/generic_worker.py
+++ b/synapse/app/generic_worker.py
@ -13,12 +13,9 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
 import contextlib
 import logging
 import sys
-from typing import Dict, Iterable, Optional, Set
+from typing import Dict, Iterable, Optional
 from typing_extensions import ContextManager
 from twisted.internet import address
 from twisted.web.resource import IResource
@ -40,24 +37,13 @@ from synapse.config._base import ConfigError
 from synapse.config.homeserver import HomeServerConfig
 from synapse.config.logger import setup_logging
 from synapse.config.server import ListenerConfig
 from synapse.federation import send_queue
 from synapse.federation.transport.server import TransportLayerServer
 from synapse.handlers.presence import (
    BasePresenceHandler,
    PresenceState,
    get_interested_parties,
 )
 from synapse.http.server import JsonResource, OptionsResource
 from synapse.http.servlet import RestServlet, parse_json_object_from_request
 from synapse.http.site import SynapseSite
 from synapse.logging.context import LoggingContext
 from synapse.metrics import METRICS_PREFIX, MetricsResource, RegistryProxy
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http import REPLICATION_PREFIX, ReplicationRestResource
 from synapse.replication.http.presence import (
    ReplicationBumpPresenceActiveTime,
    ReplicationPresenceSetState,
 )
 from synapse.replication.slave.storage._base import BaseSlavedStore
 from synapse.replication.slave.storage.account_data import SlavedAccountDataStore
 from synapse.replication.slave.storage.appservice import SlavedApplicationServiceStore
@ -77,19 +63,6 @@ from synapse.replication.slave.storage.receipts import SlavedReceiptsStore
 from synapse.replication.slave.storage.registration import SlavedRegistrationStore
 from synapse.replication.slave.storage.room import RoomStore
 from synapse.replication.slave.storage.transactions import SlavedTransactionStore
 from synapse.replication.tcp.client import ReplicationDataHandler
 from synapse.replication.tcp.commands import ClearUserSyncsCommand
 from synapse.replication.tcp.streams import (
    AccountDataStream,
    DeviceListsStream,
    GroupServerStream,
    PresenceStream,
    PushersStream,
    PushRulesStream,
    ReceiptsStream,
    TagAccountDataStream,
    ToDeviceStream,
 )
 from synapse.rest.admin import register_servlets_for_media_repo
 from synapse.rest.client.v1 import events, login, room
 from synapse.rest.client.v1.initial_sync import InitialSyncRestServlet
@ -128,7 +101,7 @@ from synapse.rest.client.versions import VersionsRestServlet
 from synapse.rest.health import HealthResource
 from synapse.rest.key.v2 import KeyApiV2Resource
 from synapse.rest.synapse.client import build_synapse_client_resource_tree
-from synapse.server import HomeServer, cache_in_self
+from synapse.server import HomeServer
 from synapse.storage.databases.main.censor_events import CensorEventsStore
 from synapse.storage.databases.main.client_ips import ClientIpWorkerStore
 from synapse.storage.databases.main.e2e_room_keys import EndToEndRoomKeyStore
@ -137,14 +110,11 @@ from synapse.storage.databases.main.metrics import ServerMetricsStore
 from synapse.storage.databases.main.monthly_active_users import (
    MonthlyActiveUsersWorkerStore,
 )
 from synapse.storage.databases.main.presence import UserPresenceState
 from synapse.storage.databases.main.search import SearchWorkerStore
 from synapse.storage.databases.main.stats import StatsStore
 from synapse.storage.databases.main.transactions import TransactionWorkerStore
 from synapse.storage.databases.main.ui_auth import UIAuthWorkerStore
 from synapse.storage.databases.main.user_directory import UserDirectoryStore
 from synapse.types import ReadReceipt
 from synapse.util.async_helpers import Linearizer
 from synapse.util.httpresourcetree import create_resource_tree
 from synapse.util.versionstring import get_version_string
@ -264,214 +234,6 @@ class KeyUploadServlet(RestServlet):
            return 200, {"one_time_key_counts": result}
 class _NullContextManager(ContextManager[None]):
    """A context manager which does nothing."""
    def __exit__(self, exc_type, exc_val, exc_tb):
        pass
 UPDATE_SYNCING_USERS_MS = 10 * 1000
 class GenericWorkerPresence(BasePresenceHandler):
    def __init__(self, hs):
        super().__init__(hs)
        self.hs = hs
        self.is_mine_id = hs.is_mine_id
        self.presence_router = hs.get_presence_router()
        self._presence_enabled = hs.config.use_presence
        # The number of ongoing syncs on this process, by user id.
        # Empty if _presence_enabled is false.
        self._user_to_num_current_syncs = {}  # type: Dict[str, int]
        self.notifier = hs.get_notifier()
        self.instance_id = hs.get_instance_id()
        # user_id -> last_sync_ms. Lists the users that have stopped syncing
        # but we haven't notified the master of that yet
        self.users_going_offline = {}
        self._bump_active_client = ReplicationBumpPresenceActiveTime.make_client(hs)
        self._set_state_client = ReplicationPresenceSetState.make_client(hs)
        self._send_stop_syncing_loop = self.clock.looping_call(
            self.send_stop_syncing, UPDATE_SYNCING_USERS_MS
        )
        self._busy_presence_enabled = hs.config.experimental.msc3026_enabled
        hs.get_reactor().addSystemEventTrigger(
            "before",
            "shutdown",
            run_as_background_process,
            "generic_presence.on_shutdown",
            self._on_shutdown,
        )
    def _on_shutdown(self):
        if self._presence_enabled:
            self.hs.get_tcp_replication().send_command(
                ClearUserSyncsCommand(self.instance_id)
            )
    def send_user_sync(self, user_id, is_syncing, last_sync_ms):
        if self._presence_enabled:
            self.hs.get_tcp_replication().send_user_sync(
                self.instance_id, user_id, is_syncing, last_sync_ms
            )
    def mark_as_coming_online(self, user_id):
        """A user has started syncing. Send a UserSync to the master, unless they
        had recently stopped syncing.
        Args:
            user_id (str)
        """
        going_offline = self.users_going_offline.pop(user_id, None)
        if not going_offline:
            # Safe to skip because we haven't yet told the master they were offline
            self.send_user_sync(user_id, True, self.clock.time_msec())
    def mark_as_going_offline(self, user_id):
        """A user has stopped syncing. We wait before notifying the master as
        its likely they'll come back soon. This allows us to avoid sending
        a stopped syncing immediately followed by a started syncing notification
        to the master
        Args:
            user_id (str)
        """
        self.users_going_offline[user_id] = self.clock.time_msec()
    def send_stop_syncing(self):
        """Check if there are any users who have stopped syncing a while ago
        and haven't come back yet. If there are poke the master about them.
        """
        now = self.clock.time_msec()
        for user_id, last_sync_ms in list(self.users_going_offline.items()):
            if now - last_sync_ms > UPDATE_SYNCING_USERS_MS:
                self.users_going_offline.pop(user_id, None)
                self.send_user_sync(user_id, False, last_sync_ms)
    async def user_syncing(
        self, user_id: str, affect_presence: bool
    ) -> ContextManager[None]:
        """Record that a user is syncing.
        Called by the sync and events servlets to record that a user has connected to
        this worker and is waiting for some events.
        """
        if not affect_presence or not self._presence_enabled:
            return _NullContextManager()
        curr_sync = self._user_to_num_current_syncs.get(user_id, 0)
        self._user_to_num_current_syncs[user_id] = curr_sync + 1
        # If we went from no in flight sync to some, notify replication
        if self._user_to_num_current_syncs[user_id] == 1:
            self.mark_as_coming_online(user_id)
        def _end():
            # We check that the user_id is in user_to_num_current_syncs because
            # user_to_num_current_syncs may have been cleared if we are
            # shutting down.
            if user_id in self._user_to_num_current_syncs:
                self._user_to_num_current_syncs[user_id] -= 1
                # If we went from one in flight sync to non, notify replication
                if self._user_to_num_current_syncs[user_id] == 0:
                    self.mark_as_going_offline(user_id)
        @contextlib.contextmanager
        def _user_syncing():
            try:
                yield
            finally:
                _end()
        return _user_syncing()
    async def notify_from_replication(self, states, stream_id):
        parties = await get_interested_parties(self.store, self.presence_router, states)
        room_ids_to_states, users_to_states = parties
        self.notifier.on_new_event(
            "presence_key",
            stream_id,
            rooms=room_ids_to_states.keys(),
            users=users_to_states.keys(),
        )
    async def process_replication_rows(self, token, rows):
        states = [
            UserPresenceState(
                row.user_id,
                row.state,
                row.last_active_ts,
                row.last_federation_update_ts,
                row.last_user_sync_ts,
                row.status_msg,
                row.currently_active,
            )
            for row in rows
        ]
        for state in states:
            self.user_to_current_state[state.user_id] = state
        stream_id = token
        await self.notify_from_replication(states, stream_id)
    def get_currently_syncing_users_for_replication(self) -> Iterable[str]:
        return [
            user_id
            for user_id, count in self._user_to_num_current_syncs.items()
            if count > 0
        ]
    async def set_state(self, target_user, state, ignore_status_msg=False):
        """Set the presence state of the user."""
        presence = state["presence"]
        valid_presence = (
            PresenceState.ONLINE,
            PresenceState.UNAVAILABLE,
            PresenceState.OFFLINE,
            PresenceState.BUSY,
        )
        if presence not in valid_presence or (
            presence == PresenceState.BUSY and not self._busy_presence_enabled
        ):
            raise SynapseError(400, "Invalid presence state")
        user_id = target_user.to_string()
        # If presence is disabled, no-op
        if not self.hs.config.use_presence:
            return
        # Proxy request to master
        await self._set_state_client(
            user_id=user_id, state=state, ignore_status_msg=ignore_status_msg
        )
    async def bump_presence_active_time(self, user):
        """We've seen the user do something that indicates they're interacting
        with the app.
        """
        # If presence is disabled, no-op
        if not self.hs.config.use_presence:
            return
        # Proxy request to master
        user_id = user.to_string()
        await self._bump_active_client(user_id=user_id)
 class GenericWorkerSlavedStore(
    # FIXME(#3714): We need to add UserDirectoryStore as we write directly
    # rather than going via the correct worker.
@ -657,234 +419,6 @@ class GenericWorkerServer(HomeServer):
        self.get_tcp_replication().start_replication(self)
    @cache_in_self
    def get_replication_data_handler(self):
        return GenericWorkerReplicationHandler(self)
    @cache_in_self
    def get_presence_handler(self):
        return GenericWorkerPresence(self)
 class GenericWorkerReplicationHandler(ReplicationDataHandler):
    def __init__(self, hs):
        super().__init__(hs)
        self.store = hs.get_datastore()
        self.presence_handler = hs.get_presence_handler()  # type: GenericWorkerPresence
        self.notifier = hs.get_notifier()
        self.notify_pushers = hs.config.start_pushers
        self.pusher_pool = hs.get_pusherpool()
        self.send_handler = None  # type: Optional[FederationSenderHandler]
        if hs.config.send_federation:
            self.send_handler = FederationSenderHandler(hs)
    async def on_rdata(self, stream_name, instance_name, token, rows):
        await super().on_rdata(stream_name, instance_name, token, rows)
        await self._process_and_notify(stream_name, instance_name, token, rows)
    async def _process_and_notify(self, stream_name, instance_name, token, rows):
        try:
            if self.send_handler:
                await self.send_handler.process_replication_rows(
                    stream_name, token, rows
                )
            if stream_name == PushRulesStream.NAME:
                self.notifier.on_new_event(
                    "push_rules_key", token, users=[row.user_id for row in rows]
                )
            elif stream_name in (AccountDataStream.NAME, TagAccountDataStream.NAME):
                self.notifier.on_new_event(
                    "account_data_key", token, users=[row.user_id for row in rows]
                )
            elif stream_name == ReceiptsStream.NAME:
                self.notifier.on_new_event(
                    "receipt_key", token, rooms=[row.room_id for row in rows]
                )
                await self.pusher_pool.on_new_receipts(
                    token, token, {row.room_id for row in rows}
                )
            elif stream_name == ToDeviceStream.NAME:
                entities = [row.entity for row in rows if row.entity.startswith("@")]
                if entities:
                    self.notifier.on_new_event("to_device_key", token, users=entities)
            elif stream_name == DeviceListsStream.NAME:
                all_room_ids = set()  # type: Set[str]
                for row in rows:
                    if row.entity.startswith("@"):
                        room_ids = await self.store.get_rooms_for_user(row.entity)
                        all_room_ids.update(room_ids)
                self.notifier.on_new_event("device_list_key", token, rooms=all_room_ids)
            elif stream_name == PresenceStream.NAME:
                await self.presence_handler.process_replication_rows(token, rows)
            elif stream_name == GroupServerStream.NAME:
                self.notifier.on_new_event(
                    "groups_key", token, users=[row.user_id for row in rows]
                )
            elif stream_name == PushersStream.NAME:
                for row in rows:
                    if row.deleted:
                        self.stop_pusher(row.user_id, row.app_id, row.pushkey)
                    else:
                        await self.start_pusher(row.user_id, row.app_id, row.pushkey)
        except Exception:
            logger.exception("Error processing replication")
    async def on_position(self, stream_name: str, instance_name: str, token: int):
        await super().on_position(stream_name, instance_name, token)
        # Also call on_rdata to ensure that stream positions are properly reset.
        await self.on_rdata(stream_name, instance_name, token, [])
    def stop_pusher(self, user_id, app_id, pushkey):
        if not self.notify_pushers:
            return
        key = "%s:%s" % (app_id, pushkey)
        pushers_for_user = self.pusher_pool.pushers.get(user_id, {})
        pusher = pushers_for_user.pop(key, None)
        if pusher is None:
            return
        logger.info("Stopping pusher %r / %r", user_id, key)
        pusher.on_stop()
    async def start_pusher(self, user_id, app_id, pushkey):
        if not self.notify_pushers:
            return
        key = "%s:%s" % (app_id, pushkey)
        logger.info("Starting pusher %r / %r", user_id, key)
        return await self.pusher_pool.start_pusher_by_id(app_id, pushkey, user_id)
    def on_remote_server_up(self, server: str):
        """Called when get a new REMOTE_SERVER_UP command."""
        # Let's wake up the transaction queue for the server in case we have
        # pending stuff to send to it.
        if self.send_handler:
            self.send_handler.wake_destination(server)
 class FederationSenderHandler:
    """Processes the fedration replication stream
    This class is only instantiate on the worker responsible for sending outbound
    federation transactions. It receives rows from the replication stream and forwards
    the appropriate entries to the FederationSender class.
    """
    def __init__(self, hs: GenericWorkerServer):
        self.store = hs.get_datastore()
        self._is_mine_id = hs.is_mine_id
        self.federation_sender = hs.get_federation_sender()
        self._hs = hs
        # Stores the latest position in the federation stream we've gotten up
        # to. This is always set before we use it.
        self.federation_position = None
        self._fed_position_linearizer = Linearizer(name="_fed_position_linearizer")
    def wake_destination(self, server: str):
        self.federation_sender.wake_destination(server)
    async def process_replication_rows(self, stream_name, token, rows):
        # The federation stream contains things that we want to send out, e.g.
        # presence, typing, etc.
        if stream_name == "federation":
            send_queue.process_rows_for_federation(self.federation_sender, rows)
            await self.update_token(token)
        # ... and when new receipts happen
        elif stream_name == ReceiptsStream.NAME:
            await self._on_new_receipts(rows)
        # ... as well as device updates and messages
        elif stream_name == DeviceListsStream.NAME:
            # The entities are either user IDs (starting with '@') whose devices
            # have changed, or remote servers that we need to tell about
            # changes.
            hosts = {row.entity for row in rows if not row.entity.startswith("@")}
            for host in hosts:
                self.federation_sender.send_device_messages(host)
        elif stream_name == ToDeviceStream.NAME:
            # The to_device stream includes stuff to be pushed to both local
            # clients and remote servers, so we ignore entities that start with
            # '@' (since they'll be local users rather than destinations).
            hosts = {row.entity for row in rows if not row.entity.startswith("@")}
            for host in hosts:
                self.federation_sender.send_device_messages(host)
    async def _on_new_receipts(self, rows):
        """
        Args:
            rows (Iterable[synapse.replication.tcp.streams.ReceiptsStream.ReceiptsStreamRow]):
                new receipts to be processed
        """
        for receipt in rows:
            # we only want to send on receipts for our own users
            if not self._is_mine_id(receipt.user_id):
                continue
            receipt_info = ReadReceipt(
                receipt.room_id,
                receipt.receipt_type,
                receipt.user_id,
                [receipt.event_id],
                receipt.data,
            )
            await self.federation_sender.send_read_receipt(receipt_info)
    async def update_token(self, token):
        """Update the record of where we have processed to in the federation stream.
        Called after we have processed a an update received over replication. Sends
        a FEDERATION_ACK back to the master, and stores the token that we have processed
         in `federation_stream_position` so that we can restart where we left off.
        """
        self.federation_position = token
        # We save and send the ACK to master asynchronously, so we don't block
        # processing on persistence. We don't need to do this operation for
        # every single RDATA we receive, we just need to do it periodically.
        if self._fed_position_linearizer.is_queued(None):
            # There is already a task queued up to save and send the token, so
            # no need to queue up another task.
            return
        run_as_background_process("_save_and_send_ack", self._save_and_send_ack)
    async def _save_and_send_ack(self):
        """Save the current federation position in the database and send an ACK
        to master with where we're up to.
        """
        try:
            # We linearize here to ensure we don't have races updating the token
            #
            # XXX this appears to be redundant, since the ReplicationCommandHandler
            # has a linearizer which ensures that we only process one line of
            # replication data at a time. Should we remove it, or is it doing useful
            # service for robustness? Or could we replace it with an assertion that
            # we're not being re-entered?
            with (await self._fed_position_linearizer.queue(None)):
                # We persist and ack the same position, so we take a copy of it
                # here as otherwise it can get modified from underneath us.
                current_position = self.federation_position
                await self.store.update_federation_out_pos(
                    "federation", current_position
                )
                # We ACK this token over replication so that the master can drop
                # its in memory queues
                self._hs.get_tcp_replication().send_federation_ack(current_position)
        except Exception:
            logger.exception("Error updating federation stream position")
 def start(config_options):
    try:
--- a/synapse/handlers/presence.py
+++ b/synapse/handlers/presence.py
@ -22,6 +22,7 @@ The methods that define policy are:
    - should_notify
 """
 import abc
 import contextlib
 import logging
 from contextlib import contextmanager
 from typing import (
@ -48,6 +49,11 @@ from synapse.logging.context import run_in_background
 from synapse.logging.utils import log_function
 from synapse.metrics import LaterGauge
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.http.presence import (
    ReplicationBumpPresenceActiveTime,
    ReplicationPresenceSetState,
 )
 from synapse.replication.tcp.commands import ClearUserSyncsCommand
 from synapse.state import StateHandler
 from synapse.storage.databases.main import DataStore
 from synapse.types import Collection, JsonDict, UserID, get_domain_from_id
@ -104,6 +110,10 @@ FEDERATION_PING_INTERVAL = 25 * 60 * 1000
 # are dead.
 EXTERNAL_PROCESS_EXPIRY = 5 * 60 * 1000
 # Delay before a worker tells the presence handler that a user has stopped
 # syncing.
 UPDATE_SYNCING_USERS_MS = 10 * 1000
 assert LAST_ACTIVE_GRANULARITY < IDLE_TIMER
@ -208,6 +218,242 @@ class BasePresenceHandler(abc.ABC):
        with the app.
        """
    async def update_external_syncs_row(
        self, process_id, user_id, is_syncing, sync_time_msec
    ):
        """Update the syncing users for an external process as a delta.
        This is a no-op when presence is handled by a different worker.
        Args:
            process_id (str): An identifier for the process the users are
                syncing against. This allows synapse to process updates
                as user start and stop syncing against a given process.
            user_id (str): The user who has started or stopped syncing
            is_syncing (bool): Whether or not the user is now syncing
            sync_time_msec(int): Time in ms when the user was last syncing
        """
        pass
    async def update_external_syncs_clear(self, process_id):
        """Marks all users that had been marked as syncing by a given process
        as offline.
        Used when the process has stopped/disappeared.
        This is a no-op when presence is handled by a different worker.
        """
        pass
    async def process_replication_rows(self, token, rows):
        """Process presence stream rows received over replication."""
        pass
 class _NullContextManager(ContextManager[None]):
    """A context manager which does nothing."""
    def __exit__(self, exc_type, exc_val, exc_tb):
        pass
 class WorkerPresenceHandler(BasePresenceHandler):
    def __init__(self, hs):
        super().__init__(hs)
        self.hs = hs
        self.is_mine_id = hs.is_mine_id
        self.presence_router = hs.get_presence_router()
        self._presence_enabled = hs.config.use_presence
        # The number of ongoing syncs on this process, by user id.
        # Empty if _presence_enabled is false.
        self._user_to_num_current_syncs = {}  # type: Dict[str, int]
        self.notifier = hs.get_notifier()
        self.instance_id = hs.get_instance_id()
        # user_id -> last_sync_ms. Lists the users that have stopped syncing
        # but we haven't notified the master of that yet
        self.users_going_offline = {}
        self._bump_active_client = ReplicationBumpPresenceActiveTime.make_client(hs)
        self._set_state_client = ReplicationPresenceSetState.make_client(hs)
        self._send_stop_syncing_loop = self.clock.looping_call(
            self.send_stop_syncing, UPDATE_SYNCING_USERS_MS
        )
        self._busy_presence_enabled = hs.config.experimental.msc3026_enabled
        hs.get_reactor().addSystemEventTrigger(
            "before",
            "shutdown",
            run_as_background_process,
            "generic_presence.on_shutdown",
            self._on_shutdown,
        )
    def _on_shutdown(self):
        if self._presence_enabled:
            self.hs.get_tcp_replication().send_command(
                ClearUserSyncsCommand(self.instance_id)
            )
    def send_user_sync(self, user_id, is_syncing, last_sync_ms):
        if self._presence_enabled:
            self.hs.get_tcp_replication().send_user_sync(
                self.instance_id, user_id, is_syncing, last_sync_ms
            )
    def mark_as_coming_online(self, user_id):
        """A user has started syncing. Send a UserSync to the master, unless they
        had recently stopped syncing.
        Args:
            user_id (str)
        """
        going_offline = self.users_going_offline.pop(user_id, None)
        if not going_offline:
            # Safe to skip because we haven't yet told the master they were offline
            self.send_user_sync(user_id, True, self.clock.time_msec())
    def mark_as_going_offline(self, user_id):
        """A user has stopped syncing. We wait before notifying the master as
        its likely they'll come back soon. This allows us to avoid sending
        a stopped syncing immediately followed by a started syncing notification
        to the master
        Args:
            user_id (str)
        """
        self.users_going_offline[user_id] = self.clock.time_msec()
    def send_stop_syncing(self):
        """Check if there are any users who have stopped syncing a while ago
        and haven't come back yet. If there are poke the master about them.
        """
        now = self.clock.time_msec()
        for user_id, last_sync_ms in list(self.users_going_offline.items()):
            if now - last_sync_ms > UPDATE_SYNCING_USERS_MS:
                self.users_going_offline.pop(user_id, None)
                self.send_user_sync(user_id, False, last_sync_ms)
    async def user_syncing(
        self, user_id: str, affect_presence: bool
    ) -> ContextManager[None]:
        """Record that a user is syncing.
        Called by the sync and events servlets to record that a user has connected to
        this worker and is waiting for some events.
        """
        if not affect_presence or not self._presence_enabled:
            return _NullContextManager()
        curr_sync = self._user_to_num_current_syncs.get(user_id, 0)
        self._user_to_num_current_syncs[user_id] = curr_sync + 1
        # If we went from no in flight sync to some, notify replication
        if self._user_to_num_current_syncs[user_id] == 1:
            self.mark_as_coming_online(user_id)
        def _end():
            # We check that the user_id is in user_to_num_current_syncs because
            # user_to_num_current_syncs may have been cleared if we are
            # shutting down.
            if user_id in self._user_to_num_current_syncs:
                self._user_to_num_current_syncs[user_id] -= 1
                # If we went from one in flight sync to non, notify replication
                if self._user_to_num_current_syncs[user_id] == 0:
                    self.mark_as_going_offline(user_id)
        @contextlib.contextmanager
        def _user_syncing():
            try:
                yield
            finally:
                _end()
        return _user_syncing()
    async def notify_from_replication(self, states, stream_id):
        parties = await get_interested_parties(self.store, self.presence_router, states)
        room_ids_to_states, users_to_states = parties
        self.notifier.on_new_event(
            "presence_key",
            stream_id,
            rooms=room_ids_to_states.keys(),
            users=users_to_states.keys(),
        )
    async def process_replication_rows(self, token, rows):
        states = [
            UserPresenceState(
                row.user_id,
                row.state,
                row.last_active_ts,
                row.last_federation_update_ts,
                row.last_user_sync_ts,
                row.status_msg,
                row.currently_active,
            )
            for row in rows
        ]
        for state in states:
            self.user_to_current_state[state.user_id] = state
        stream_id = token
        await self.notify_from_replication(states, stream_id)
    def get_currently_syncing_users_for_replication(self) -> Iterable[str]:
        return [
            user_id
            for user_id, count in self._user_to_num_current_syncs.items()
            if count > 0
        ]
    async def set_state(self, target_user, state, ignore_status_msg=False):
        """Set the presence state of the user."""
        presence = state["presence"]
        valid_presence = (
            PresenceState.ONLINE,
            PresenceState.UNAVAILABLE,
            PresenceState.OFFLINE,
            PresenceState.BUSY,
        )
        if presence not in valid_presence or (
            presence == PresenceState.BUSY and not self._busy_presence_enabled
        ):
            raise SynapseError(400, "Invalid presence state")
        user_id = target_user.to_string()
        # If presence is disabled, no-op
        if not self.hs.config.use_presence:
            return
        # Proxy request to master
        await self._set_state_client(
            user_id=user_id, state=state, ignore_status_msg=ignore_status_msg
        )
    async def bump_presence_active_time(self, user):
        """We've seen the user do something that indicates they're interacting
        with the app.
        """
        # If presence is disabled, no-op
        if not self.hs.config.use_presence:
            return
        # Proxy request to master
        user_id = user.to_string()
        await self._bump_active_client(user_id=user_id)
 class PresenceHandler(BasePresenceHandler):
    def __init__(self, hs: "HomeServer"):
--- a/synapse/replication/tcp/client.py
+++ b/synapse/replication/tcp/client.py
@ -14,22 +14,36 @@
 """A replication client for use by synapse workers.
 """
 import logging
-from typing import TYPE_CHECKING, Dict, List, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Set, Tuple
 from twisted.internet.defer import Deferred
 from twisted.internet.protocol import ReconnectingClientFactory
 from synapse.api.constants import EventTypes
 from synapse.federation import send_queue
 from synapse.federation.sender import FederationSender
 from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable
 from synapse.metrics.background_process_metrics import run_as_background_process
 from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
-from synapse.replication.tcp.streams import TypingStream
+from synapse.replication.tcp.streams import (
    AccountDataStream,
    DeviceListsStream,
    GroupServerStream,
    PresenceStream,
    PushersStream,
    PushRulesStream,
    ReceiptsStream,
    TagAccountDataStream,
    ToDeviceStream,
    TypingStream,
 )
 from synapse.replication.tcp.streams.events import (
    EventsStream,
    EventsStreamEventRow,
    EventsStreamRow,
 )
-from synapse.types import PersistedEventPosition, UserID
+from synapse.types import PersistedEventPosition, ReadReceipt, UserID
-from synapse.util.async_helpers import timeout_deferred
+from synapse.util.async_helpers import Linearizer, timeout_deferred
 from synapse.util.metrics import Measure
 if TYPE_CHECKING:
@ -105,6 +119,14 @@ class ReplicationDataHandler:
        self._instance_name = hs.get_instance_name()
        self._typing_handler = hs.get_typing_handler()
        self._notify_pushers = hs.config.start_pushers
        self._pusher_pool = hs.get_pusherpool()
        self._presence_handler = hs.get_presence_handler()
        self.send_handler = None  # type: Optional[FederationSenderHandler]
        if hs.should_send_federation():
            self.send_handler = FederationSenderHandler(hs)
        # Map from stream to list of deferreds waiting for the stream to
        # arrive at a particular position. The lists are sorted by stream position.
        self._streams_to_waiters = {}  # type: Dict[str, List[Tuple[int, Deferred]]]
@ -125,13 +147,53 @@ class ReplicationDataHandler:
        """
        self.store.process_replication_rows(stream_name, instance_name, token, rows)
        if self.send_handler:
            await self.send_handler.process_replication_rows(stream_name, token, rows)
        if stream_name == TypingStream.NAME:
            self._typing_handler.process_replication_rows(token, rows)
            self.notifier.on_new_event(
                "typing_key", token, rooms=[row.room_id for row in rows]
            )
-
+        elif stream_name == PushRulesStream.NAME:
-        if stream_name == EventsStream.NAME:
+            self.notifier.on_new_event(
                "push_rules_key", token, users=[row.user_id for row in rows]
            )
        elif stream_name in (AccountDataStream.NAME, TagAccountDataStream.NAME):
            self.notifier.on_new_event(
                "account_data_key", token, users=[row.user_id for row in rows]
            )
        elif stream_name == ReceiptsStream.NAME:
            self.notifier.on_new_event(
                "receipt_key", token, rooms=[row.room_id for row in rows]
            )
            await self._pusher_pool.on_new_receipts(
                token, token, {row.room_id for row in rows}
            )
        elif stream_name == ToDeviceStream.NAME:
            entities = [row.entity for row in rows if row.entity.startswith("@")]
            if entities:
                self.notifier.on_new_event("to_device_key", token, users=entities)
        elif stream_name == DeviceListsStream.NAME:
            all_room_ids = set()  # type: Set[str]
            for row in rows:
                if row.entity.startswith("@"):
                    room_ids = await self.store.get_rooms_for_user(row.entity)
                    all_room_ids.update(room_ids)
            self.notifier.on_new_event("device_list_key", token, rooms=all_room_ids)
        elif stream_name == GroupServerStream.NAME:
            self.notifier.on_new_event(
                "groups_key", token, users=[row.user_id for row in rows]
            )
        elif stream_name == PushersStream.NAME:
            for row in rows:
                if row.deleted:
                    self.stop_pusher(row.user_id, row.app_id, row.pushkey)
                else:
                    await self.start_pusher(row.user_id, row.app_id, row.pushkey)
        elif stream_name == PresenceStream.NAME:
            await self._presence_handler.process_replication_rows(token, rows)
        elif stream_name == EventsStream.NAME:
            # We shouldn't get multiple rows per token for events stream, so
            # we don't need to optimise this for multiple rows.
            for row in rows:
@ -190,7 +252,7 @@ class ReplicationDataHandler:
        waiting_list[:] = waiting_list[index_of_first_deferred_not_called:]
    async def on_position(self, stream_name: str, instance_name: str, token: int):
-        self.store.process_replication_rows(stream_name, instance_name, token, [])
+        await self.on_rdata(stream_name, instance_name, token, [])
        # We poke the generic "replication" notifier to wake anything up that
        # may be streaming.
@ -199,6 +261,11 @@ class ReplicationDataHandler:
    def on_remote_server_up(self, server: str):
        """Called when get a new REMOTE_SERVER_UP command."""
        # Let's wake up the transaction queue for the server in case we have
        # pending stuff to send to it.
        if self.send_handler:
            self.send_handler.wake_destination(server)
    async def wait_for_stream_position(
        self, instance_name: str, stream_name: str, position: int
    ):
@ -235,3 +302,153 @@ class ReplicationDataHandler:
            logger.info(
                "Finished waiting for repl stream %r to reach %s", stream_name, position
            )
    def stop_pusher(self, user_id, app_id, pushkey):
        if not self._notify_pushers:
            return
        key = "%s:%s" % (app_id, pushkey)
        pushers_for_user = self._pusher_pool.pushers.get(user_id, {})
        pusher = pushers_for_user.pop(key, None)
        if pusher is None:
            return
        logger.info("Stopping pusher %r / %r", user_id, key)
        pusher.on_stop()
    async def start_pusher(self, user_id, app_id, pushkey):
        if not self._notify_pushers:
            return
        key = "%s:%s" % (app_id, pushkey)
        logger.info("Starting pusher %r / %r", user_id, key)
        return await self._pusher_pool.start_pusher_by_id(app_id, pushkey, user_id)
 class FederationSenderHandler:
    """Processes the fedration replication stream
    This class is only instantiate on the worker responsible for sending outbound
    federation transactions. It receives rows from the replication stream and forwards
    the appropriate entries to the FederationSender class.
    """
    def __init__(self, hs: "HomeServer"):
        assert hs.should_send_federation()
        self.store = hs.get_datastore()
        self._is_mine_id = hs.is_mine_id
        self._hs = hs
        # We need to make a temporary value to ensure that mypy picks up the
        # right type. We know we should have a federation sender instance since
        # `should_send_federation` is True.
        sender = hs.get_federation_sender()
        assert isinstance(sender, FederationSender)
        self.federation_sender = sender
        # Stores the latest position in the federation stream we've gotten up
        # to. This is always set before we use it.
        self.federation_position = None  # type: Optional[int]
        self._fed_position_linearizer = Linearizer(name="_fed_position_linearizer")
    def wake_destination(self, server: str):
        self.federation_sender.wake_destination(server)
    async def process_replication_rows(self, stream_name, token, rows):
        # The federation stream contains things that we want to send out, e.g.
        # presence, typing, etc.
        if stream_name == "federation":
            send_queue.process_rows_for_federation(self.federation_sender, rows)
            await self.update_token(token)
        # ... and when new receipts happen
        elif stream_name == ReceiptsStream.NAME:
            await self._on_new_receipts(rows)
        # ... as well as device updates and messages
        elif stream_name == DeviceListsStream.NAME:
            # The entities are either user IDs (starting with '@') whose devices
            # have changed, or remote servers that we need to tell about
            # changes.
            hosts = {row.entity for row in rows if not row.entity.startswith("@")}
            for host in hosts:
                self.federation_sender.send_device_messages(host)
        elif stream_name == ToDeviceStream.NAME:
            # The to_device stream includes stuff to be pushed to both local
            # clients and remote servers, so we ignore entities that start with
            # '@' (since they'll be local users rather than destinations).
            hosts = {row.entity for row in rows if not row.entity.startswith("@")}
            for host in hosts:
                self.federation_sender.send_device_messages(host)
    async def _on_new_receipts(self, rows):
        """
        Args:
            rows (Iterable[synapse.replication.tcp.streams.ReceiptsStream.ReceiptsStreamRow]):
                new receipts to be processed
        """
        for receipt in rows:
            # we only want to send on receipts for our own users
            if not self._is_mine_id(receipt.user_id):
                continue
            receipt_info = ReadReceipt(
                receipt.room_id,
                receipt.receipt_type,
                receipt.user_id,
                [receipt.event_id],
                receipt.data,
            )
            await self.federation_sender.send_read_receipt(receipt_info)
    async def update_token(self, token):
        """Update the record of where we have processed to in the federation stream.
        Called after we have processed a an update received over replication. Sends
        a FEDERATION_ACK back to the master, and stores the token that we have processed
         in `federation_stream_position` so that we can restart where we left off.
        """
        self.federation_position = token
        # We save and send the ACK to master asynchronously, so we don't block
        # processing on persistence. We don't need to do this operation for
        # every single RDATA we receive, we just need to do it periodically.
        if self._fed_position_linearizer.is_queued(None):
            # There is already a task queued up to save and send the token, so
            # no need to queue up another task.
            return
        run_as_background_process("_save_and_send_ack", self._save_and_send_ack)
    async def _save_and_send_ack(self):
        """Save the current federation position in the database and send an ACK
        to master with where we're up to.
        """
        # We should only be calling this once we've got a token.
        assert self.federation_position is not None
        try:
            # We linearize here to ensure we don't have races updating the token
            #
            # XXX this appears to be redundant, since the ReplicationCommandHandler
            # has a linearizer which ensures that we only process one line of
            # replication data at a time. Should we remove it, or is it doing useful
            # service for robustness? Or could we replace it with an assertion that
            # we're not being re-entered?
            with (await self._fed_position_linearizer.queue(None)):
                # We persist and ack the same position, so we take a copy of it
                # here as otherwise it can get modified from underneath us.
                current_position = self.federation_position
                await self.store.update_federation_out_pos(
                    "federation", current_position
                )
                # We ACK this token over replication so that the master can drop
                # its in memory queues
                self._hs.get_tcp_replication().send_federation_ack(current_position)
        except Exception:
            logger.exception("Error updating federation stream position")
--- a/synapse/server.py
+++ b/synapse/server.py
@ -85,7 +85,11 @@ from synapse.handlers.initial_sync import InitialSyncHandler
 from synapse.handlers.message import EventCreationHandler, MessageHandler
 from synapse.handlers.pagination import PaginationHandler
 from synapse.handlers.password_policy import PasswordPolicyHandler
-from synapse.handlers.presence import PresenceHandler
+from synapse.handlers.presence import (
    BasePresenceHandler,
    PresenceHandler,
    WorkerPresenceHandler,
 )
 from synapse.handlers.profile import ProfileHandler
 from synapse.handlers.read_marker import ReadMarkerHandler
 from synapse.handlers.receipts import ReceiptsHandler
@ -415,8 +419,11 @@ class HomeServer(metaclass=abc.ABCMeta):
        return StateResolutionHandler(self)
    @cache_in_self
-    def get_presence_handler(self) -> PresenceHandler:
+    def get_presence_handler(self) -> BasePresenceHandler:
-        return PresenceHandler(self)
+        if self.config.worker_app:
            return WorkerPresenceHandler(self)
        else:
            return PresenceHandler(self)
    @cache_in_self
    def get_typing_writer_handler(self) -> TypingWriterHandler:
--- a/tests/replication/_base.py
+++ b/tests/replication/_base.py
@ -21,13 +21,11 @@ from twisted.web.http import HTTPChannel
 from twisted.web.resource import Resource
 from twisted.web.server import Request, Site
-from synapse.app.generic_worker import (
+from synapse.app.generic_worker import GenericWorkerServer
    GenericWorkerReplicationHandler,
    GenericWorkerServer,
 )
 from synapse.http.server import JsonResource
 from synapse.http.site import SynapseRequest, SynapseSite
 from synapse.replication.http import ReplicationRestResource
 from synapse.replication.tcp.client import ReplicationDataHandler
 from synapse.replication.tcp.handler import ReplicationCommandHandler
 from synapse.replication.tcp.protocol import ClientReplicationStreamProtocol
 from synapse.replication.tcp.resource import (
@ -431,7 +429,7 @@ class BaseMultiWorkerStreamTestCase(unittest.HomeserverTestCase):
            server_protocol.makeConnection(server_to_client_transport)
-class TestReplicationDataHandler(GenericWorkerReplicationHandler):
+class TestReplicationDataHandler(ReplicationDataHandler):
    """Drop-in for ReplicationDataHandler which just collects RDATA rows"""
    def __init__(self, hs: HomeServer):
		`@ -0,0 +1 @@`
							Move some replication processing out of `generic_worker`.