Enable Faster Remote Room Joins against worker-mode Synapse. (#14752)
* Enable Complement tests for Faster Remote Room Joins on worker-mode * (dangerous) Add an override to allow Complement to use FRRJ under workers * Newsfile Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org> * Fix race where we didn't send out replication notification * MORE HACKS * Fix get_un_partial_stated_rooms_token to take instance_name * Fix bad merge * Remove warning * Correctly advance un_partial_stated_room_stream * Fix merge * Add another notify_replication * Fixups * Create a separate ReplicationNotifier * Fix test * Fix portdb * Create a separate ReplicationNotifier * Fix test * Fix portdb * Fix presence test * Newsfile * Apply suggestions from code review * Update changelog.d/14752.misc Co-authored-by: Erik Johnston <erik@matrix.org> * lint Signed-off-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org> Co-authored-by: Erik Johnston <erik@matrix.org>pull/14896/head
parent
d329a566df
commit
22cc93afe3
|
@ -0,0 +1 @@
|
||||||
|
Enable Complement tests for Faster Remote Room Joins against worker-mode Synapse.
|
|
@ -94,10 +94,8 @@ allow_device_name_lookup_over_federation: true
|
||||||
experimental_features:
|
experimental_features:
|
||||||
# Enable history backfilling support
|
# Enable history backfilling support
|
||||||
msc2716_enabled: true
|
msc2716_enabled: true
|
||||||
{% if not workers_in_use %}
|
|
||||||
# client-side support for partial state in /send_join responses
|
# client-side support for partial state in /send_join responses
|
||||||
faster_joins: true
|
faster_joins: true
|
||||||
{% endif %}
|
|
||||||
# Enable support for polls
|
# Enable support for polls
|
||||||
msc3381_polls_enabled: true
|
msc3381_polls_enabled: true
|
||||||
# Enable deleting device-specific notification settings stored in account data
|
# Enable deleting device-specific notification settings stored in account data
|
||||||
|
|
|
@ -190,7 +190,7 @@ fi
|
||||||
|
|
||||||
extra_test_args=()
|
extra_test_args=()
|
||||||
|
|
||||||
test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930"
|
test_tags="synapse_blacklist,msc3787,msc3874,msc3890,msc3391,msc3930,faster_joins"
|
||||||
|
|
||||||
# All environment variables starting with PASS_ will be shared.
|
# All environment variables starting with PASS_ will be shared.
|
||||||
# (The prefix is stripped off before reaching the container.)
|
# (The prefix is stripped off before reaching the container.)
|
||||||
|
@ -223,12 +223,9 @@ else
|
||||||
export PASS_SYNAPSE_COMPLEMENT_DATABASE=sqlite
|
export PASS_SYNAPSE_COMPLEMENT_DATABASE=sqlite
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# We only test faster room joins on monoliths, because they are purposefully
|
# The tests for importing historical messages (MSC2716)
|
||||||
# being developed without worker support to start with.
|
# only pass with monoliths, currently.
|
||||||
#
|
test_tags="$test_tags,msc2716"
|
||||||
# The tests for importing historical messages (MSC2716) also only pass with monoliths,
|
|
||||||
# currently.
|
|
||||||
test_tags="$test_tags,faster_joins,msc2716"
|
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -282,13 +282,6 @@ def start(config_options: List[str]) -> None:
|
||||||
"synapse.app.user_dir",
|
"synapse.app.user_dir",
|
||||||
)
|
)
|
||||||
|
|
||||||
if config.experimental.faster_joins_enabled:
|
|
||||||
raise ConfigError(
|
|
||||||
"You have enabled the experimental `faster_joins` config option, but it is "
|
|
||||||
"not compatible with worker deployments yet. Please disable `faster_joins` "
|
|
||||||
"or run Synapse as a single process deployment instead."
|
|
||||||
)
|
|
||||||
|
|
||||||
synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts
|
synapse.events.USE_FROZEN_DICTS = config.server.use_frozen_dicts
|
||||||
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
|
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
|
||||||
|
|
||||||
|
|
|
@ -974,6 +974,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
|
||||||
self.federation = hs.get_federation_client()
|
self.federation = hs.get_federation_client()
|
||||||
self.clock = hs.get_clock()
|
self.clock = hs.get_clock()
|
||||||
self.device_handler = device_handler
|
self.device_handler = device_handler
|
||||||
|
self._notifier = hs.get_notifier()
|
||||||
|
|
||||||
self._remote_edu_linearizer = Linearizer(name="remote_device_list")
|
self._remote_edu_linearizer = Linearizer(name="remote_device_list")
|
||||||
|
|
||||||
|
@ -1054,6 +1055,7 @@ class DeviceListUpdater(DeviceListWorkerUpdater):
|
||||||
user_id,
|
user_id,
|
||||||
device_id,
|
device_id,
|
||||||
)
|
)
|
||||||
|
self._notifier.notify_replication()
|
||||||
|
|
||||||
room_ids = await self.store.get_rooms_for_user(user_id)
|
room_ids = await self.store.get_rooms_for_user(user_id)
|
||||||
if not room_ids:
|
if not room_ids:
|
||||||
|
|
|
@ -1870,14 +1870,15 @@ class FederationHandler:
|
||||||
logger.info("Clearing partial-state flag for %s", room_id)
|
logger.info("Clearing partial-state flag for %s", room_id)
|
||||||
success = await self.store.clear_partial_state_room(room_id)
|
success = await self.store.clear_partial_state_room(room_id)
|
||||||
|
|
||||||
|
# Poke the notifier so that other workers see the write to
|
||||||
|
# the un-partial-stated rooms stream.
|
||||||
|
self._notifier.notify_replication()
|
||||||
|
|
||||||
if success:
|
if success:
|
||||||
logger.info("State resync complete for %s", room_id)
|
logger.info("State resync complete for %s", room_id)
|
||||||
self._storage_controllers.state.notify_room_un_partial_stated(
|
self._storage_controllers.state.notify_room_un_partial_stated(
|
||||||
room_id
|
room_id
|
||||||
)
|
)
|
||||||
# Poke the notifier so that other workers see the write to
|
|
||||||
# the un-partial-stated rooms stream.
|
|
||||||
self._notifier.notify_replication()
|
|
||||||
|
|
||||||
# TODO(faster_joins) update room stats and user directory?
|
# TODO(faster_joins) update room stats and user directory?
|
||||||
# https://github.com/matrix-org/synapse/issues/12814
|
# https://github.com/matrix-org/synapse/issues/12814
|
||||||
|
|
|
@ -16,7 +16,6 @@ from typing import TYPE_CHECKING
|
||||||
import attr
|
import attr
|
||||||
|
|
||||||
from synapse.replication.tcp.streams import Stream
|
from synapse.replication.tcp.streams import Stream
|
||||||
from synapse.replication.tcp.streams._base import current_token_without_instance
|
|
||||||
|
|
||||||
if TYPE_CHECKING:
|
if TYPE_CHECKING:
|
||||||
from synapse.server import HomeServer
|
from synapse.server import HomeServer
|
||||||
|
@ -42,8 +41,7 @@ class UnPartialStatedRoomStream(Stream):
|
||||||
store = hs.get_datastores().main
|
store = hs.get_datastores().main
|
||||||
super().__init__(
|
super().__init__(
|
||||||
hs.get_instance_name(),
|
hs.get_instance_name(),
|
||||||
# TODO(faster_joins, multiple writers): we need to account for instance names
|
store.get_un_partial_stated_rooms_token,
|
||||||
current_token_without_instance(store.get_un_partial_stated_rooms_token),
|
|
||||||
store.get_un_partial_stated_rooms_from_stream,
|
store.get_un_partial_stated_rooms_from_stream,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@ -70,7 +68,6 @@ class UnPartialStatedEventStream(Stream):
|
||||||
store = hs.get_datastores().main
|
store = hs.get_datastores().main
|
||||||
super().__init__(
|
super().__init__(
|
||||||
hs.get_instance_name(),
|
hs.get_instance_name(),
|
||||||
# TODO(faster_joins, multiple writers): we need to account for instance names
|
store.get_un_partial_stated_events_token,
|
||||||
current_token_without_instance(store.get_un_partial_stated_events_token),
|
|
||||||
store.get_un_partial_stated_events_from_stream,
|
store.get_un_partial_stated_events_from_stream,
|
||||||
)
|
)
|
||||||
|
|
|
@ -322,11 +322,12 @@ class EventsWorkerStore(SQLBaseStore):
|
||||||
"stream_id",
|
"stream_id",
|
||||||
)
|
)
|
||||||
|
|
||||||
def get_un_partial_stated_events_token(self) -> int:
|
def get_un_partial_stated_events_token(self, instance_name: str) -> int:
|
||||||
# TODO(faster_joins, multiple writers): This is inappropriate if there are multiple
|
return (
|
||||||
# writers because workers that don't write often will hold all
|
self._un_partial_stated_events_stream_id_gen.get_current_token_for_writer(
|
||||||
# readers up.
|
instance_name
|
||||||
return self._un_partial_stated_events_stream_id_gen.get_current_token()
|
)
|
||||||
|
)
|
||||||
|
|
||||||
async def get_un_partial_stated_events_from_stream(
|
async def get_un_partial_stated_events_from_stream(
|
||||||
self, instance_name: str, last_id: int, current_id: int, limit: int
|
self, instance_name: str, last_id: int, current_id: int, limit: int
|
||||||
|
@ -416,6 +417,8 @@ class EventsWorkerStore(SQLBaseStore):
|
||||||
self._stream_id_gen.advance(instance_name, token)
|
self._stream_id_gen.advance(instance_name, token)
|
||||||
elif stream_name == BackfillStream.NAME:
|
elif stream_name == BackfillStream.NAME:
|
||||||
self._backfill_id_gen.advance(instance_name, -token)
|
self._backfill_id_gen.advance(instance_name, -token)
|
||||||
|
elif stream_name == UnPartialStatedEventStream.NAME:
|
||||||
|
self._un_partial_stated_events_stream_id_gen.advance(instance_name, token)
|
||||||
super().process_replication_position(stream_name, instance_name, token)
|
super().process_replication_position(stream_name, instance_name, token)
|
||||||
|
|
||||||
async def have_censored_event(self, event_id: str) -> bool:
|
async def have_censored_event(self, event_id: str) -> bool:
|
||||||
|
|
|
@ -43,6 +43,7 @@ from synapse.api.errors import StoreError
|
||||||
from synapse.api.room_versions import RoomVersion, RoomVersions
|
from synapse.api.room_versions import RoomVersion, RoomVersions
|
||||||
from synapse.config.homeserver import HomeServerConfig
|
from synapse.config.homeserver import HomeServerConfig
|
||||||
from synapse.events import EventBase
|
from synapse.events import EventBase
|
||||||
|
from synapse.replication.tcp.streams.partial_state import UnPartialStatedRoomStream
|
||||||
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
|
from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause
|
||||||
from synapse.storage.database import (
|
from synapse.storage.database import (
|
||||||
DatabasePool,
|
DatabasePool,
|
||||||
|
@ -144,6 +145,13 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
|
||||||
"stream_id",
|
"stream_id",
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def process_replication_position(
|
||||||
|
self, stream_name: str, instance_name: str, token: int
|
||||||
|
) -> None:
|
||||||
|
if stream_name == UnPartialStatedRoomStream.NAME:
|
||||||
|
self._un_partial_stated_rooms_stream_id_gen.advance(instance_name, token)
|
||||||
|
return super().process_replication_position(stream_name, instance_name, token)
|
||||||
|
|
||||||
async def store_room(
|
async def store_room(
|
||||||
self,
|
self,
|
||||||
room_id: str,
|
room_id: str,
|
||||||
|
@ -1281,13 +1289,10 @@ class RoomWorkerStore(CacheInvalidationWorkerStore):
|
||||||
)
|
)
|
||||||
return result["join_event_id"], result["device_lists_stream_id"]
|
return result["join_event_id"], result["device_lists_stream_id"]
|
||||||
|
|
||||||
def get_un_partial_stated_rooms_token(self) -> int:
|
def get_un_partial_stated_rooms_token(self, instance_name: str) -> int:
|
||||||
# TODO(faster_joins, multiple writers): This is inappropriate if there
|
return self._un_partial_stated_rooms_stream_id_gen.get_current_token_for_writer(
|
||||||
# are multiple writers because workers that don't write often will
|
instance_name
|
||||||
# hold all readers up.
|
)
|
||||||
# (See `MultiWriterIdGenerator.get_persisted_upto_position` for an
|
|
||||||
# explanation.)
|
|
||||||
return self._un_partial_stated_rooms_stream_id_gen.get_current_token()
|
|
||||||
|
|
||||||
async def get_un_partial_stated_rooms_from_stream(
|
async def get_un_partial_stated_rooms_from_stream(
|
||||||
self, instance_name: str, last_id: int, current_id: int, limit: int
|
self, instance_name: str, last_id: int, current_id: int, limit: int
|
||||||
|
|
|
@ -95,6 +95,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
|
||||||
for row in rows:
|
for row in rows:
|
||||||
assert isinstance(row, UnPartialStatedEventStreamRow)
|
assert isinstance(row, UnPartialStatedEventStreamRow)
|
||||||
self._get_state_group_for_event.invalidate((row.event_id,))
|
self._get_state_group_for_event.invalidate((row.event_id,))
|
||||||
|
self.is_partial_state_event.invalidate((row.event_id,))
|
||||||
|
|
||||||
super().process_replication_rows(stream_name, instance_name, token, rows)
|
super().process_replication_rows(stream_name, instance_name, token, rows)
|
||||||
|
|
||||||
|
@ -485,6 +486,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore):
|
||||||
"rejection_status_changed": rejection_status_changed,
|
"rejection_status_changed": rejection_status_changed,
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
txn.call_after(self.hs.get_notifier().on_new_replication_data)
|
||||||
|
|
||||||
|
|
||||||
class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
|
class MainStateBackgroundUpdateStore(RoomMemberWorkerStore):
|
||||||
|
|
Loading…
Reference in New Issue