to_device updates could be dropped when consuming the replication stream (#15349)

Co-authored-by: reivilibre <oliverw@matrix.org>
pull/15358/head
Mathieu Velten 2023-03-30 19:41:14 +02:00 committed by GitHub
parent 91c3f32673
commit 6f68e32bfb
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 98 additions and 15 deletions

1
changelog.d/15349.bugfix Normal file
View File

@ -0,0 +1 @@
Fix a long-standing bug where some to_device messages could be dropped when using workers.

View File

@ -617,14 +617,14 @@ class DeviceInboxWorkerStore(SQLBaseStore):
# We limit like this as we might have multiple rows per stream_id, and
# we want to make sure we always get all entries for any stream_id
# we return.
upper_pos = min(current_id, last_id + limit)
upto_token = min(current_id, last_id + limit)
sql = (
"SELECT max(stream_id), user_id"
" FROM device_inbox"
" WHERE ? < stream_id AND stream_id <= ?"
" GROUP BY user_id"
)
txn.execute(sql, (last_id, upper_pos))
txn.execute(sql, (last_id, upto_token))
updates = [(row[0], row[1:]) for row in txn]
sql = (
@ -633,19 +633,13 @@ class DeviceInboxWorkerStore(SQLBaseStore):
" WHERE ? < stream_id AND stream_id <= ?"
" GROUP BY destination"
)
txn.execute(sql, (last_id, upper_pos))
txn.execute(sql, (last_id, upto_token))
updates.extend((row[0], row[1:]) for row in txn)
# Order by ascending stream ordering
updates.sort()
limited = False
upto_token = current_id
if len(updates) >= limit:
upto_token = updates[-1][0]
limited = True
return updates, upto_token, limited
return updates, upto_token, upto_token < current_id
return await self.db_pool.runInteraction(
"get_all_new_device_messages", get_all_new_device_messages_txn

View File

@ -54,6 +54,10 @@ class BaseStreamTestCase(unittest.HomeserverTestCase):
if not hiredis:
skip = "Requires hiredis"
if not USE_POSTGRES_FOR_TESTS:
# Redis replication only takes place on Postgres
skip = "Requires Postgres"
def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None:
# build a replication server
server_factory = ReplicationStreamProtocolFactory(hs)

View File

@ -37,11 +37,6 @@ class AccountDataStreamTestCase(BaseStreamTestCase):
# also one global update
self.get_success(store.add_account_data_for_user("test_user", "m.global", {}))
# tell the notifier to catch up to avoid duplicate rows.
# workaround for https://github.com/matrix-org/synapse/issues/7360
# FIXME remove this when the above is fixed
self.replicate()
# check we're testing what we think we are: no rows should yet have been
# received
self.assertEqual([], self.test_handler.received_rdata_rows)

View File

@ -0,0 +1,89 @@
# Copyright 2023 The Matrix.org Foundation C.I.C.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import logging
import synapse
from synapse.replication.tcp.streams._base import _STREAM_UPDATE_TARGET_ROW_COUNT
from synapse.types import JsonDict
from tests.replication._base import BaseStreamTestCase
logger = logging.getLogger(__name__)
class ToDeviceStreamTestCase(BaseStreamTestCase):
servlets = [
synapse.rest.admin.register_servlets,
synapse.rest.client.login.register_servlets,
]
def test_to_device_stream(self) -> None:
store = self.hs.get_datastores().main
user1 = self.register_user("user1", "pass")
self.login("user1", "pass", "device")
user2 = self.register_user("user2", "pass")
self.login("user2", "pass", "device")
# connect to pull the updates related to users creation/login
self.reconnect()
self.replicate()
self.test_handler.received_rdata_rows.clear()
# disconnect so we can accumulate the updates without pulling them
self.disconnect()
msg: JsonDict = {}
msg["sender"] = "@sender:example.org"
msg["type"] = "m.new_device"
# add messages to the device inbox for user1 up until the
# limit defined for a stream update batch
for i in range(0, _STREAM_UPDATE_TARGET_ROW_COUNT):
msg["content"] = {"device": {}}
messages = {user1: {"device": msg}}
self.get_success(
store.add_messages_from_remote_to_device_inbox(
"example.org",
f"{i}",
messages,
)
)
# add one more message, for user2 this time
# this message would be dropped before fixing #15335
msg["content"] = {"device": {}}
messages = {user2: {"device": msg}}
self.get_success(
store.add_messages_from_remote_to_device_inbox(
"example.org",
f"{_STREAM_UPDATE_TARGET_ROW_COUNT}",
messages,
)
)
# replication is disconnected so we shouldn't get any updates yet
self.assertEqual([], self.test_handler.received_rdata_rows)
# now reconnect to pull the updates
self.reconnect()
self.replicate()
# we should receive the fact that we have to_device updates
# for user1 and user2
received_rows = self.test_handler.received_rdata_rows
self.assertEqual(len(received_rows), 2)
self.assertEqual(received_rows[0][2].entity, user1)
self.assertEqual(received_rows[1][2].entity, user2)