From 305545682da8e5661897093d6aa441928f794b9a Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 2 Nov 2020 12:36:18 +0000 Subject: [PATCH 01/26] Fix typo in workers doc --- docs/workers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workers.md b/docs/workers.md index 84a9759e34..eb47b1eb70 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -116,7 +116,7 @@ public internet; it has no authentication and is unencrypted. ### Worker configuration In the config file for each worker, you must specify the type of worker -application (`worker_app`), and you should specify a unqiue name for the worker +application (`worker_app`), and you should specify a unique name for the worker (`worker_name`). The currently available worker applications are listed below. You must also specify the HTTP replication endpoint that it should talk to on the main synapse process. `worker_replication_host` should specify the host of From 26b46796ea7d092c2fc3f47be7923c3a2cf634d3 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Mon, 2 Nov 2020 12:56:16 +0000 Subject: [PATCH 02/26] Fix typos in systemd-with-workers doc --- docs/systemd-with-workers/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/systemd-with-workers/README.md b/docs/systemd-with-workers/README.md index 257c09446f..8e57d4f62e 100644 --- a/docs/systemd-with-workers/README.md +++ b/docs/systemd-with-workers/README.md @@ -37,10 +37,10 @@ synapse master process to be started as part of the `matrix-synapse.target` target. 1. For each worker process to be enabled, run `systemctl enable matrix-synapse-worker@.service`. For each ``, there -should be a corresponding configuration file +should be a corresponding configuration file. `/etc/matrix-synapse/workers/.yaml`. 1. Start all the synapse processes with `systemctl start matrix-synapse.target`. -1. Tell systemd to start synapse on boot with `systemctl enable matrix-synapse.target`/ +1. Tell systemd to start synapse on boot with `systemctl enable matrix-synapse.target`. ## Usage From 11fd90a2b7a5192026e9d177e042acd2aff58348 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 2 Nov 2020 13:33:56 +0000 Subject: [PATCH 03/26] typo --- docs/openid.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/openid.md b/docs/openid.md index 6670f36261..da391f74aa 100644 --- a/docs/openid.md +++ b/docs/openid.md @@ -205,7 +205,7 @@ GitHub is a bit special as it is not an OpenID Connect compliant provider, but just a regular OAuth2 provider. The [`/user` API endpoint](https://developer.github.com/v3/users/#get-the-authenticated-user) -can be used to retrieve information on the authenticated user. As the Synaspse +can be used to retrieve information on the authenticated user. As the Synapse login mechanism needs an attribute to uniquely identify users, and that endpoint does not return a `sub` property, an alternative `subject_claim` has to be set. From 1eb9de90c0dbb99a28304a295a0f6ebb0b8d1c6c Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Nov 2020 13:55:56 +0000 Subject: [PATCH 04/26] Improve start time by adding index to e2e_cross_signing_keys (#8694) We do a `SELECT MAX(stream_id) FROM e2e_cross_signing_keys` on startup. --- changelog.d/8694.misc | 1 + scripts/synapse_port_db | 2 ++ .../storage/databases/main/end_to_end_keys.py | 18 ++++++++++++++++-- .../delta/58/23e2e_cross_signing_keys_idx.sql | 17 +++++++++++++++++ 4 files changed, 36 insertions(+), 2 deletions(-) create mode 100644 changelog.d/8694.misc create mode 100644 synapse/storage/databases/main/schema/delta/58/23e2e_cross_signing_keys_idx.sql diff --git a/changelog.d/8694.misc b/changelog.d/8694.misc new file mode 100644 index 0000000000..c90a6375ad --- /dev/null +++ b/changelog.d/8694.misc @@ -0,0 +1 @@ +Improve start time by adding an index to `e2e_cross_signing_keys.stream_id`. diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 6c7664ad4a..13c0120bb4 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -40,6 +40,7 @@ from synapse.storage.database import DatabasePool, make_conn from synapse.storage.databases.main.client_ips import ClientIpBackgroundUpdateStore from synapse.storage.databases.main.deviceinbox import DeviceInboxBackgroundUpdateStore from synapse.storage.databases.main.devices import DeviceBackgroundUpdateStore +from synapse.storage.databases.main.end_to_end_keys import EndToEndKeyBackgroundStore from synapse.storage.databases.main.events_bg_updates import ( EventsBackgroundUpdatesStore, ) @@ -174,6 +175,7 @@ class Store( StateBackgroundUpdateStore, MainStateBackgroundUpdateStore, UserDirectoryBackgroundUpdateStore, + EndToEndKeyBackgroundStore, StatsStore, ): def execute(self, f, *args, **kwargs): diff --git a/synapse/storage/databases/main/end_to_end_keys.py b/synapse/storage/databases/main/end_to_end_keys.py index 4415909414..4d1b92d1aa 100644 --- a/synapse/storage/databases/main/end_to_end_keys.py +++ b/synapse/storage/databases/main/end_to_end_keys.py @@ -24,7 +24,7 @@ from twisted.enterprise.adbapi import Connection from synapse.logging.opentracing import log_kv, set_tag, trace from synapse.storage._base import SQLBaseStore, db_to_json -from synapse.storage.database import make_in_list_sql_clause +from synapse.storage.database import DatabasePool, make_in_list_sql_clause from synapse.storage.types import Cursor from synapse.types import JsonDict from synapse.util import json_encoder @@ -33,6 +33,7 @@ from synapse.util.iterutils import batch_iter if TYPE_CHECKING: from synapse.handlers.e2e_keys import SignatureListItem + from synapse.server import HomeServer @attr.s(slots=True) @@ -47,7 +48,20 @@ class DeviceKeyLookupResult: keys = attr.ib(type=Optional[JsonDict]) -class EndToEndKeyWorkerStore(SQLBaseStore): +class EndToEndKeyBackgroundStore(SQLBaseStore): + def __init__(self, database: DatabasePool, db_conn: Connection, hs: "HomeServer"): + super().__init__(database, db_conn, hs) + + self.db_pool.updates.register_background_index_update( + "e2e_cross_signing_keys_idx", + index_name="e2e_cross_signing_keys_stream_idx", + table="e2e_cross_signing_keys", + columns=["stream_id"], + unique=True, + ) + + +class EndToEndKeyWorkerStore(EndToEndKeyBackgroundStore): async def get_e2e_device_keys_for_federation_query( self, user_id: str ) -> Tuple[int, List[JsonDict]]: diff --git a/synapse/storage/databases/main/schema/delta/58/23e2e_cross_signing_keys_idx.sql b/synapse/storage/databases/main/schema/delta/58/23e2e_cross_signing_keys_idx.sql new file mode 100644 index 0000000000..61c558db77 --- /dev/null +++ b/synapse/storage/databases/main/schema/delta/58/23e2e_cross_signing_keys_idx.sql @@ -0,0 +1,17 @@ +/* Copyright 2020 The Matrix.org Foundation C.I.C + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +INSERT INTO background_updates (update_name, progress_json) VALUES + ('e2e_cross_signing_keys_idx', '{}'); From ca39e67f3d881f2895c87781431ed50808bf4ef9 Mon Sep 17 00:00:00 2001 From: Dan Callahan Date: Mon, 2 Nov 2020 16:33:06 +0000 Subject: [PATCH 05/26] Use Python 3.8 in Docker images by default (#8698) This bumps us closer to current Python without going all the way to 3.9. Fixes #8674 Signed-off-by: Dan Callahan --- changelog.d/8698.misc | 1 + docker/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8698.misc diff --git a/changelog.d/8698.misc b/changelog.d/8698.misc new file mode 100644 index 0000000000..6b777fb295 --- /dev/null +++ b/changelog.d/8698.misc @@ -0,0 +1 @@ +Use Python 3.8 in Docker images by default. diff --git a/docker/Dockerfile b/docker/Dockerfile index 27512f8600..9791d3ddf0 100644 --- a/docker/Dockerfile +++ b/docker/Dockerfile @@ -11,7 +11,7 @@ # docker build -f docker/Dockerfile --build-arg PYTHON_VERSION=3.6 . # -ARG PYTHON_VERSION=3.7 +ARG PYTHON_VERSION=3.8 ### ### Stage 0: builder From 59cc2472b3ef6bd84919fabed1f65187556abe78 Mon Sep 17 00:00:00 2001 From: David Baker Date: Mon, 2 Nov 2020 16:36:14 +0000 Subject: [PATCH 06/26] Add base pushrule to notify for jitsi conferences (#8286) This could be customised to trigger a different kind of notification in the future, but for now it's a normal non-highlight one. --- changelog.d/8286.feature | 1 + synapse/push/baserules.py | 24 ++++++++++++++++++++++++ 2 files changed, 25 insertions(+) create mode 100644 changelog.d/8286.feature diff --git a/changelog.d/8286.feature b/changelog.d/8286.feature new file mode 100644 index 0000000000..2c371419af --- /dev/null +++ b/changelog.d/8286.feature @@ -0,0 +1 @@ +Add a push rule that highlights when a jitsi conference is created in a room. diff --git a/synapse/push/baserules.py b/synapse/push/baserules.py index 2858b61fb1..f5788c1de7 100644 --- a/synapse/push/baserules.py +++ b/synapse/push/baserules.py @@ -498,6 +498,30 @@ BASE_APPEND_UNDERRIDE_RULES = [ ], "actions": ["notify", {"set_tweak": "highlight", "value": False}], }, + { + "rule_id": "global/underride/.im.vector.jitsi", + "conditions": [ + { + "kind": "event_match", + "key": "type", + "pattern": "im.vector.modular.widgets", + "_id": "_type_modular_widgets", + }, + { + "kind": "event_match", + "key": "content.type", + "pattern": "jitsi", + "_id": "_content_type_jitsi", + }, + { + "kind": "event_match", + "key": "state_key", + "pattern": "*", + "_id": "_is_state_event", + }, + ], + "actions": ["notify", {"set_tweak": "highlight", "value": False}], + }, ] From e89bd3ea923cd9d1e33ff1be3c94dc6d838837e0 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Mon, 2 Nov 2020 18:01:09 +0000 Subject: [PATCH 07/26] Improve error messages of non-str displayname/avatar_url (#8705) This PR fixes two things: * Corrects the copy/paste error of telling the client their displayname is wrong when they are submitting an `avatar_url`. * Returns a `M_INVALID_PARAM` instead of `M_UNKNOWN` for non-str type parameters. Reported by @t3chguy. --- changelog.d/8705.misc | 1 + synapse/handlers/profile.py | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) create mode 100644 changelog.d/8705.misc diff --git a/changelog.d/8705.misc b/changelog.d/8705.misc new file mode 100644 index 0000000000..1189464a02 --- /dev/null +++ b/changelog.d/8705.misc @@ -0,0 +1 @@ +Improve the error returned when a non-string displayname or avatar_url is used when updating a user's profile. \ No newline at end of file diff --git a/synapse/handlers/profile.py b/synapse/handlers/profile.py index 14348faaf3..74a1ddd780 100644 --- a/synapse/handlers/profile.py +++ b/synapse/handlers/profile.py @@ -189,7 +189,9 @@ class ProfileHandler(BaseHandler): ) if not isinstance(new_displayname, str): - raise SynapseError(400, "Invalid displayname") + raise SynapseError( + 400, "'displayname' must be a string", errcode=Codes.INVALID_PARAM + ) if len(new_displayname) > MAX_DISPLAYNAME_LEN: raise SynapseError( @@ -273,7 +275,9 @@ class ProfileHandler(BaseHandler): ) if not isinstance(new_avatar_url, str): - raise SynapseError(400, "Invalid displayname") + raise SynapseError( + 400, "'avatar_url' must be a string", errcode=Codes.INVALID_PARAM + ) if len(new_avatar_url) > MAX_AVATAR_URL_LEN: raise SynapseError( From d04c2d19b360356bacc754a2f592ccfd8d6536b3 Mon Sep 17 00:00:00 2001 From: Matthew Hodgson Date: Mon, 2 Nov 2020 21:22:29 +0000 Subject: [PATCH 08/26] grammar --- docs/workers.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/workers.md b/docs/workers.md index eb47b1eb70..cd1f823b77 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -302,7 +302,7 @@ Additionally, there is *experimental* support for moving writing of specific streams (such as events) off of the main process to a particular worker. (This is only supported with Redis-based replication.) -Currently support streams are `events` and `typing`. +Currently supported streams are `events` and `typing`. To enable this, the worker must have a HTTP replication listener configured, have a `worker_name` and be listed in the `instance_map` config. For example to From 4b09b7438e99e379c483026fb578f92fd7e30d9f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 3 Nov 2020 10:27:11 +0000 Subject: [PATCH 09/26] Document how to set up multiple event persisters (#8706) --- changelog.d/8706.doc | 1 + docs/workers.md | 12 ++++++++++++ 2 files changed, 13 insertions(+) create mode 100644 changelog.d/8706.doc diff --git a/changelog.d/8706.doc b/changelog.d/8706.doc new file mode 100644 index 0000000000..96a0427e73 --- /dev/null +++ b/changelog.d/8706.doc @@ -0,0 +1 @@ +Document experimental support for running multiple event persisters. diff --git a/docs/workers.md b/docs/workers.md index cd1f823b77..4e046bdb31 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -319,6 +319,18 @@ stream_writers: events: event_persister1 ``` +The `events` stream also experimentally supports having multiple writers, where +work is sharded between them by room ID. Note that you *must* restart all worker +instances when adding or removing event persisters. An example `stream_writers` +configuration with multiple writers: + +```yaml +stream_writers: + events: + - event_persister1 + - event_persister2 +``` + #### Background tasks There is also *experimental* support for moving background tasks to a separate From 243d427fbcb24c78c2df143767cd4636844fc82e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Tue, 3 Nov 2020 12:13:48 +0000 Subject: [PATCH 10/26] Block clients from sending server ACLs that lock the local server out. (#8708) Fixes #4042 --- changelog.d/8708.misc | 1 + mypy.ini | 1 + synapse/events/validator.py | 27 ++++++++++------ synapse/handlers/message.py | 3 ++ tests/handlers/test_message.py | 57 ++++++++++++++++++++++++++++++++++ 5 files changed, 80 insertions(+), 9 deletions(-) create mode 100644 changelog.d/8708.misc diff --git a/changelog.d/8708.misc b/changelog.d/8708.misc new file mode 100644 index 0000000000..be679fb0f8 --- /dev/null +++ b/changelog.d/8708.misc @@ -0,0 +1 @@ +Block attempts by clients to send server ACLs, or redactions of server ACLs, that would result in the local server being blocked from the room. diff --git a/mypy.ini b/mypy.ini index 1ece2ba082..fc9f8d8050 100644 --- a/mypy.ini +++ b/mypy.ini @@ -13,6 +13,7 @@ files = synapse/config, synapse/event_auth.py, synapse/events/builder.py, + synapse/events/validator.py, synapse/events/spamcheck.py, synapse/federation, synapse/handlers/_base.py, diff --git a/synapse/events/validator.py b/synapse/events/validator.py index 5f9af8529b..f8f3b1a31e 100644 --- a/synapse/events/validator.py +++ b/synapse/events/validator.py @@ -13,20 +13,26 @@ # See the License for the specific language governing permissions and # limitations under the License. +from typing import Union + from synapse.api.constants import MAX_ALIAS_LENGTH, EventTypes, Membership from synapse.api.errors import Codes, SynapseError from synapse.api.room_versions import EventFormatVersions +from synapse.config.homeserver import HomeServerConfig +from synapse.events import EventBase +from synapse.events.builder import EventBuilder from synapse.events.utils import validate_canonicaljson +from synapse.federation.federation_server import server_matches_acl_event from synapse.types import EventID, RoomID, UserID class EventValidator: - def validate_new(self, event, config): + def validate_new(self, event: EventBase, config: HomeServerConfig): """Validates the event has roughly the right format Args: - event (FrozenEvent): The event to validate. - config (Config): The homeserver's configuration. + event: The event to validate. + config: The homeserver's configuration. """ self.validate_builder(event) @@ -76,12 +82,18 @@ class EventValidator: if event.type == EventTypes.Retention: self._validate_retention(event) - def _validate_retention(self, event): + if event.type == EventTypes.ServerACL: + if not server_matches_acl_event(config.server_name, event): + raise SynapseError( + 400, "Can't create an ACL event that denies the local server" + ) + + def _validate_retention(self, event: EventBase): """Checks that an event that defines the retention policy for a room respects the format enforced by the spec. Args: - event (FrozenEvent): The event to validate. + event: The event to validate. """ if not event.is_state(): raise SynapseError(code=400, msg="must be a state event") @@ -116,13 +128,10 @@ class EventValidator: errcode=Codes.BAD_JSON, ) - def validate_builder(self, event): + def validate_builder(self, event: Union[EventBase, EventBuilder]): """Validates that the builder/event has roughly the right format. Only checks values that we expect a proto event to have, rather than all the fields an event would have - - Args: - event (EventBuilder|FrozenEvent) """ strings = ["room_id", "sender", "type"] diff --git a/synapse/handlers/message.py b/synapse/handlers/message.py index ca5602c13e..c6791fb912 100644 --- a/synapse/handlers/message.py +++ b/synapse/handlers/message.py @@ -1138,6 +1138,9 @@ class EventCreationHandler: if original_event.room_id != event.room_id: raise SynapseError(400, "Cannot redact event from a different room") + if original_event.type == EventTypes.ServerACL: + raise AuthError(403, "Redacting server ACL events is not permitted") + prev_state_ids = await context.get_prev_state_ids() auth_events_ids = self.auth.compute_auth_events( event, prev_state_ids, for_verification=True diff --git a/tests/handlers/test_message.py b/tests/handlers/test_message.py index 2e0fea04af..8b57081cbe 100644 --- a/tests/handlers/test_message.py +++ b/tests/handlers/test_message.py @@ -154,3 +154,60 @@ class EventCreationTestCase(unittest.HomeserverTestCase): # Check that we've deduplicated the events. self.assertEqual(len(events), 2) self.assertEqual(events[0].event_id, events[1].event_id) + + +class ServerAclValidationTestCase(unittest.HomeserverTestCase): + servlets = [ + admin.register_servlets, + login.register_servlets, + room.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.user_id = self.register_user("tester", "foobar") + self.access_token = self.login("tester", "foobar") + self.room_id = self.helper.create_room_as(self.user_id, tok=self.access_token) + + def test_allow_server_acl(self): + """Test that sending an ACL that blocks everyone but ourselves works. + """ + + self.helper.send_state( + self.room_id, + EventTypes.ServerACL, + body={"allow": [self.hs.hostname]}, + tok=self.access_token, + expect_code=200, + ) + + def test_deny_server_acl_block_outselves(self): + """Test that sending an ACL that blocks ourselves does not work. + """ + self.helper.send_state( + self.room_id, + EventTypes.ServerACL, + body={}, + tok=self.access_token, + expect_code=400, + ) + + def test_deny_redact_server_acl(self): + """Test that attempting to redact an ACL is blocked. + """ + + body = self.helper.send_state( + self.room_id, + EventTypes.ServerACL, + body={"allow": [self.hs.hostname]}, + tok=self.access_token, + expect_code=200, + ) + event_id = body["event_id"] + + # Redaction of event should fail. + path = "/_matrix/client/r0/rooms/%s/redact/%s" % (self.room_id, event_id) + request, channel = self.make_request( + "POST", path, content={}, access_token=self.access_token + ) + self.render(request) + self.assertEqual(int(channel.result["code"]), 403) From 4fda58ddd257980bac8cfc277825f4deecc5dd7b Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Tue, 3 Nov 2020 13:48:25 +0100 Subject: [PATCH 11/26] Remove the "draft" status of the Room Details Admin API (#8702) Fixes #8550 --- changelog.d/8702.misc | 1 + docs/admin_api/rooms.md | 4 +--- 2 files changed, 2 insertions(+), 3 deletions(-) create mode 100644 changelog.d/8702.misc diff --git a/changelog.d/8702.misc b/changelog.d/8702.misc new file mode 100644 index 0000000000..f20085cbe4 --- /dev/null +++ b/changelog.d/8702.misc @@ -0,0 +1 @@ +Remove the "draft" status of the Room Details Admin API. \ No newline at end of file diff --git a/docs/admin_api/rooms.md b/docs/admin_api/rooms.md index fa9b914fa7..0c05b0ed55 100644 --- a/docs/admin_api/rooms.md +++ b/docs/admin_api/rooms.md @@ -265,12 +265,10 @@ Response: Once the `next_token` parameter is no longer present, we know we've reached the end of the list. -# DRAFT: Room Details API +# Room Details API The Room Details admin API allows server admins to get all details of a room. -This API is still a draft and details might change! - The following fields are possible in the JSON response body: * `room_id` - The ID of the room. From 6abb1ad0be1cde7cd414eee0e0a1f195462eb1bd Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 4 Nov 2020 11:26:05 +0000 Subject: [PATCH 12/26] Consolidate purge table lists to prevent desyncronisation (#8713) I idly noticed that these lists were out of sync with each other, causing us to miss a table in a test case (`local_invites`). Let's consolidate this list instead to prevent this from happening in the future. --- changelog.d/8713.misc | 1 + tests/rest/admin/test_room.py | 105 ++++++++++++---------------------- 2 files changed, 39 insertions(+), 67 deletions(-) create mode 100644 changelog.d/8713.misc diff --git a/changelog.d/8713.misc b/changelog.d/8713.misc new file mode 100644 index 0000000000..c5d3f3216b --- /dev/null +++ b/changelog.d/8713.misc @@ -0,0 +1 @@ +Consolidate duplicated lists of purged tables that are checked in tests. \ No newline at end of file diff --git a/tests/rest/admin/test_room.py b/tests/rest/admin/test_room.py index 6dfc709dc5..535d68f284 100644 --- a/tests/rest/admin/test_room.py +++ b/tests/rest/admin/test_room.py @@ -531,40 +531,7 @@ class DeleteRoomTestCase(unittest.HomeserverTestCase): def _is_purged(self, room_id): """Test that the following tables have been purged of all rows related to the room. """ - for table in ( - "current_state_events", - "event_backward_extremities", - "event_forward_extremities", - "event_json", - "event_push_actions", - "event_search", - "events", - "group_rooms", - "public_room_list_stream", - "receipts_graph", - "receipts_linearized", - "room_aliases", - "room_depth", - "room_memberships", - "room_stats_state", - "room_stats_current", - "room_stats_historical", - "room_stats_earliest_token", - "rooms", - "stream_ordering_to_exterm", - "users_in_public_rooms", - "users_who_share_private_rooms", - "appservice_room_list", - "e2e_room_keys", - "event_push_summary", - "pusher_throttle", - "group_summary_rooms", - "local_invites", - "room_account_data", - "room_tags", - # "state_groups", # Current impl leaves orphaned state groups around. - "state_groups_state", - ): + for table in PURGE_TABLES: count = self.get_success( self.store.db_pool.simple_select_one_onecol( table=table, @@ -633,39 +600,7 @@ class PurgeRoomTestCase(unittest.HomeserverTestCase): self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) # Test that the following tables have been purged of all rows related to the room. - for table in ( - "current_state_events", - "event_backward_extremities", - "event_forward_extremities", - "event_json", - "event_push_actions", - "event_search", - "events", - "group_rooms", - "public_room_list_stream", - "receipts_graph", - "receipts_linearized", - "room_aliases", - "room_depth", - "room_memberships", - "room_stats_state", - "room_stats_current", - "room_stats_historical", - "room_stats_earliest_token", - "rooms", - "stream_ordering_to_exterm", - "users_in_public_rooms", - "users_who_share_private_rooms", - "appservice_room_list", - "e2e_room_keys", - "event_push_summary", - "pusher_throttle", - "group_summary_rooms", - "room_account_data", - "room_tags", - # "state_groups", # Current impl leaves orphaned state groups around. - "state_groups_state", - ): + for table in PURGE_TABLES: count = self.get_success( self.store.db_pool.simple_select_one_onecol( table=table, @@ -1500,3 +1435,39 @@ class JoinAliasRoomTestCase(unittest.HomeserverTestCase): self.render(request) self.assertEquals(200, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual(private_room_id, channel.json_body["joined_rooms"][0]) + + +PURGE_TABLES = [ + "current_state_events", + "event_backward_extremities", + "event_forward_extremities", + "event_json", + "event_push_actions", + "event_search", + "events", + "group_rooms", + "public_room_list_stream", + "receipts_graph", + "receipts_linearized", + "room_aliases", + "room_depth", + "room_memberships", + "room_stats_state", + "room_stats_current", + "room_stats_historical", + "room_stats_earliest_token", + "rooms", + "stream_ordering_to_exterm", + "users_in_public_rooms", + "users_who_share_private_rooms", + "appservice_room_list", + "e2e_room_keys", + "event_push_summary", + "pusher_throttle", + "group_summary_rooms", + "local_invites", + "room_account_data", + "room_tags", + # "state_groups", # Current impl leaves orphaned state groups around. + "state_groups_state", +] From e4676bd8772275833857c803f8fe4025744cec01 Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 5 Nov 2020 14:55:45 +0100 Subject: [PATCH 13/26] Add `displayname` to Shared-Secret Registration for admins (#8722) Add `displayname` to Shared-Secret Registration for admins to `POST /_synapse/admin/v1/register` --- changelog.d/8722.feature | 1 + docs/admin_api/register_api.rst | 4 +- synapse/rest/admin/users.py | 2 + tests/rest/admin/test_user.py | 121 +++++++++++++++++++++++++++++++- tests/unittest.py | 19 +++-- 5 files changed, 138 insertions(+), 9 deletions(-) create mode 100644 changelog.d/8722.feature diff --git a/changelog.d/8722.feature b/changelog.d/8722.feature new file mode 100644 index 0000000000..0413d8838b --- /dev/null +++ b/changelog.d/8722.feature @@ -0,0 +1 @@ +Add `displayname` to Shared-Secret Registration for admins. \ No newline at end of file diff --git a/docs/admin_api/register_api.rst b/docs/admin_api/register_api.rst index 3a63109aa0..c3057b204b 100644 --- a/docs/admin_api/register_api.rst +++ b/docs/admin_api/register_api.rst @@ -18,7 +18,8 @@ To fetch the nonce, you need to request one from the API:: Once you have the nonce, you can make a ``POST`` to the same URL with a JSON body containing the nonce, username, password, whether they are an admin -(optional, False by default), and a HMAC digest of the content. +(optional, False by default), and a HMAC digest of the content. Also you can +set the displayname (optional, ``username`` by default). As an example:: @@ -26,6 +27,7 @@ As an example:: > { "nonce": "thisisanonce", "username": "pepper_roni", + "displayname": "Pepper Roni", "password": "pizza", "admin": true, "mac": "mac_digest_here" diff --git a/synapse/rest/admin/users.py b/synapse/rest/admin/users.py index b337311a37..3638e219f2 100644 --- a/synapse/rest/admin/users.py +++ b/synapse/rest/admin/users.py @@ -412,6 +412,7 @@ class UserRegisterServlet(RestServlet): admin = body.get("admin", None) user_type = body.get("user_type", None) + displayname = body.get("displayname", None) if user_type is not None and user_type not in UserTypes.ALL_USER_TYPES: raise SynapseError(400, "Invalid user type") @@ -448,6 +449,7 @@ class UserRegisterServlet(RestServlet): password_hash=password_hash, admin=bool(admin), user_type=user_type, + default_display_name=displayname, by_admin=True, ) diff --git a/tests/rest/admin/test_user.py b/tests/rest/admin/test_user.py index 7df32e5093..d74efede06 100644 --- a/tests/rest/admin/test_user.py +++ b/tests/rest/admin/test_user.py @@ -24,7 +24,7 @@ from mock import Mock import synapse.rest.admin from synapse.api.constants import UserTypes from synapse.api.errors import Codes, HttpResponseException, ResourceLimitError -from synapse.rest.client.v1 import login, room +from synapse.rest.client.v1 import login, profile, room from synapse.rest.client.v2_alpha import sync from tests import unittest @@ -34,7 +34,10 @@ from tests.unittest import override_config class UserRegisterTestCase(unittest.HomeserverTestCase): - servlets = [synapse.rest.admin.register_servlets_for_client_rest_resource] + servlets = [ + synapse.rest.admin.register_servlets_for_client_rest_resource, + profile.register_servlets, + ] def make_homeserver(self, reactor, clock): @@ -325,6 +328,120 @@ class UserRegisterTestCase(unittest.HomeserverTestCase): self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) self.assertEqual("Invalid user type", channel.json_body["error"]) + def test_displayname(self): + """ + Test that displayname of new user is set + """ + + # set no displayname + request, channel = self.make_request("GET", self.url) + self.render(request) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(nonce.encode("ascii") + b"\x00bob1\x00abc123\x00notadmin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + {"nonce": nonce, "username": "bob1", "password": "abc123", "mac": want_mac} + ) + request, channel = self.make_request("POST", self.url, body.encode("utf8")) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("@bob1:test", channel.json_body["user_id"]) + + request, channel = self.make_request("GET", "/profile/@bob1:test/displayname") + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("bob1", channel.json_body["displayname"]) + + # displayname is None + request, channel = self.make_request("GET", self.url) + self.render(request) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(nonce.encode("ascii") + b"\x00bob2\x00abc123\x00notadmin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + { + "nonce": nonce, + "username": "bob2", + "displayname": None, + "password": "abc123", + "mac": want_mac, + } + ) + request, channel = self.make_request("POST", self.url, body.encode("utf8")) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("@bob2:test", channel.json_body["user_id"]) + + request, channel = self.make_request("GET", "/profile/@bob2:test/displayname") + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("bob2", channel.json_body["displayname"]) + + # displayname is empty + request, channel = self.make_request("GET", self.url) + self.render(request) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(nonce.encode("ascii") + b"\x00bob3\x00abc123\x00notadmin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + { + "nonce": nonce, + "username": "bob3", + "displayname": "", + "password": "abc123", + "mac": want_mac, + } + ) + request, channel = self.make_request("POST", self.url, body.encode("utf8")) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("@bob3:test", channel.json_body["user_id"]) + + request, channel = self.make_request("GET", "/profile/@bob3:test/displayname") + self.render(request) + self.assertEqual(404, int(channel.result["code"]), msg=channel.result["body"]) + + # set displayname + request, channel = self.make_request("GET", self.url) + self.render(request) + nonce = channel.json_body["nonce"] + + want_mac = hmac.new(key=b"shared", digestmod=hashlib.sha1) + want_mac.update(nonce.encode("ascii") + b"\x00bob4\x00abc123\x00notadmin") + want_mac = want_mac.hexdigest() + + body = json.dumps( + { + "nonce": nonce, + "username": "bob4", + "displayname": "Bob's Name", + "password": "abc123", + "mac": want_mac, + } + ) + request, channel = self.make_request("POST", self.url, body.encode("utf8")) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("@bob4:test", channel.json_body["user_id"]) + + request, channel = self.make_request("GET", "/profile/@bob4:test/displayname") + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual("Bob's Name", channel.json_body["displayname"]) + @override_config( {"limit_usage_by_mau": True, "max_mau_value": 2, "mau_trial_days": 0} ) diff --git a/tests/unittest.py b/tests/unittest.py index 08cf9b10c5..e36ac89196 100644 --- a/tests/unittest.py +++ b/tests/unittest.py @@ -546,18 +546,24 @@ class HomeserverTestCase(TestCase): return result - def register_user(self, username, password, admin=False): + def register_user( + self, + username: str, + password: str, + admin: Optional[bool] = False, + displayname: Optional[str] = None, + ) -> str: """ Register a user. Requires the Admin API be registered. Args: - username (bytes/unicode): The user part of the new user. - password (bytes/unicode): The password of the new user. - admin (bool): Whether the user should be created as an admin - or not. + username: The user part of the new user. + password: The password of the new user. + admin: Whether the user should be created as an admin or not. + displayname: The displayname of the new user. Returns: - The MXID of the new user (unicode). + The MXID of the new user. """ self.hs.config.registration_shared_secret = "shared" @@ -581,6 +587,7 @@ class HomeserverTestCase(TestCase): { "nonce": nonce, "username": username, + "displayname": displayname, "password": password, "admin": admin, "mac": want_mac, From c3119d1536582c639bf67bf7e3c914935e3bbd7e Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Thu, 5 Nov 2020 19:59:12 +0100 Subject: [PATCH 14/26] Add an admin API for users' media statistics (#8700) Add `GET /_synapse/admin/v1/statistics/users/media` to get statisics about local media usage by users. Related to #6094 It is the first API for statistics. Goal is to avoid/reduce usage of sql queries like [Wiki analyzing Synapse](https://github.com/matrix-org/synapse/wiki/SQL-for-analyzing-Synapse-PostgreSQL-database-stats) Signed-off-by: Dirk Klimpel dirk@klimpel.org --- changelog.d/8700.feature | 1 + docs/admin_api/statistics.md | 83 ++++ synapse/rest/admin/__init__.py | 2 + synapse/rest/admin/statistics.py | 122 ++++++ synapse/storage/databases/main/stats.py | 127 +++++++ tests/rest/admin/test_statistics.py | 485 ++++++++++++++++++++++++ 6 files changed, 820 insertions(+) create mode 100644 changelog.d/8700.feature create mode 100644 docs/admin_api/statistics.md create mode 100644 synapse/rest/admin/statistics.py create mode 100644 tests/rest/admin/test_statistics.py diff --git a/changelog.d/8700.feature b/changelog.d/8700.feature new file mode 100644 index 0000000000..47d63dce02 --- /dev/null +++ b/changelog.d/8700.feature @@ -0,0 +1 @@ +Add an admin API for local user media statistics. Contributed by @dklimpel. diff --git a/docs/admin_api/statistics.md b/docs/admin_api/statistics.md new file mode 100644 index 0000000000..d398a120fb --- /dev/null +++ b/docs/admin_api/statistics.md @@ -0,0 +1,83 @@ +# Users' media usage statistics + +Returns information about all local media usage of users. Gives the +possibility to filter them by time and user. + +The API is: + +``` +GET /_synapse/admin/v1/statistics/users/media +``` + +To use it, you will need to authenticate by providing an `access_token` +for a server admin: see [README.rst](README.rst). + +A response body like the following is returned: + +```json +{ + "users": [ + { + "displayname": "foo_user_0", + "media_count": 2, + "media_length": 134, + "user_id": "@foo_user_0:test" + }, + { + "displayname": "foo_user_1", + "media_count": 2, + "media_length": 134, + "user_id": "@foo_user_1:test" + } + ], + "next_token": 3, + "total": 10 +} +``` + +To paginate, check for `next_token` and if present, call the endpoint +again with `from` set to the value of `next_token`. This will return a new page. + +If the endpoint does not return a `next_token` then there are no more +reports to paginate through. + +**Parameters** + +The following parameters should be set in the URL: + +* `limit`: string representing a positive integer - Is optional but is + used for pagination, denoting the maximum number of items to return + in this call. Defaults to `100`. +* `from`: string representing a positive integer - Is optional but used for pagination, + denoting the offset in the returned results. This should be treated as an opaque value + and not explicitly set to anything other than the return value of `next_token` from a + previous call. Defaults to `0`. +* `order_by` - string - The method in which to sort the returned list of users. Valid values are: + - `user_id` - Users are ordered alphabetically by `user_id`. This is the default. + - `displayname` - Users are ordered alphabetically by `displayname`. + - `media_length` - Users are ordered by the total size of uploaded media in bytes. + Smallest to largest. + - `media_count` - Users are ordered by number of uploaded media. Smallest to largest. +* `from_ts` - string representing a positive integer - Considers only + files created at this timestamp or later. Unix timestamp in ms. +* `until_ts` - string representing a positive integer - Considers only + files created at this timestamp or earlier. Unix timestamp in ms. +* `search_term` - string - Filter users by their user ID localpart **or** displayname. + The search term can be found in any part of the string. + Defaults to no filtering. +* `dir` - string - Direction of order. Either `f` for forwards or `b` for backwards. + Setting this value to `b` will reverse the above sort order. Defaults to `f`. + + +**Response** + +The following fields are returned in the JSON response body: + +* `users` - An array of objects, each containing information + about the user and their local media. Objects contain the following fields: + - `displayname` - string - Displayname of this user. + - `media_count` - integer - Number of uploaded media by this user. + - `media_length` - integer - Size of uploaded media in bytes by this user. + - `user_id` - string - Fully-qualified user ID (ex. `@user:server.com`). +* `next_token` - integer - Opaque value used for pagination. See above. +* `total` - integer - Total number of users after filtering. diff --git a/synapse/rest/admin/__init__.py b/synapse/rest/admin/__init__.py index fa7e9e4043..2a4f7a1740 100644 --- a/synapse/rest/admin/__init__.py +++ b/synapse/rest/admin/__init__.py @@ -47,6 +47,7 @@ from synapse.rest.admin.rooms import ( ShutdownRoomRestServlet, ) from synapse.rest.admin.server_notice_servlet import SendServerNoticeServlet +from synapse.rest.admin.statistics import UserMediaStatisticsRestServlet from synapse.rest.admin.users import ( AccountValidityRenewServlet, DeactivateAccountRestServlet, @@ -227,6 +228,7 @@ def register_servlets(hs, http_server): DeviceRestServlet(hs).register(http_server) DevicesRestServlet(hs).register(http_server) DeleteDevicesRestServlet(hs).register(http_server) + UserMediaStatisticsRestServlet(hs).register(http_server) EventReportDetailRestServlet(hs).register(http_server) EventReportsRestServlet(hs).register(http_server) PushersRestServlet(hs).register(http_server) diff --git a/synapse/rest/admin/statistics.py b/synapse/rest/admin/statistics.py new file mode 100644 index 0000000000..f2490e382d --- /dev/null +++ b/synapse/rest/admin/statistics.py @@ -0,0 +1,122 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Dirk Klimpel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import logging +from typing import TYPE_CHECKING, Tuple + +from synapse.api.errors import Codes, SynapseError +from synapse.http.servlet import RestServlet, parse_integer, parse_string +from synapse.http.site import SynapseRequest +from synapse.rest.admin._base import admin_patterns, assert_requester_is_admin +from synapse.storage.databases.main.stats import UserSortOrder +from synapse.types import JsonDict + +if TYPE_CHECKING: + from synapse.server import HomeServer + +logger = logging.getLogger(__name__) + + +class UserMediaStatisticsRestServlet(RestServlet): + """ + Get statistics about uploaded media by users. + """ + + PATTERNS = admin_patterns("/statistics/users/media$") + + def __init__(self, hs: "HomeServer"): + self.hs = hs + self.auth = hs.get_auth() + self.store = hs.get_datastore() + + async def on_GET(self, request: SynapseRequest) -> Tuple[int, JsonDict]: + await assert_requester_is_admin(self.auth, request) + + order_by = parse_string( + request, "order_by", default=UserSortOrder.USER_ID.value + ) + if order_by not in ( + UserSortOrder.MEDIA_LENGTH.value, + UserSortOrder.MEDIA_COUNT.value, + UserSortOrder.USER_ID.value, + UserSortOrder.DISPLAYNAME.value, + ): + raise SynapseError( + 400, + "Unknown value for order_by: %s" % (order_by,), + errcode=Codes.INVALID_PARAM, + ) + + start = parse_integer(request, "from", default=0) + if start < 0: + raise SynapseError( + 400, + "Query parameter from must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + limit = parse_integer(request, "limit", default=100) + if limit < 0: + raise SynapseError( + 400, + "Query parameter limit must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + from_ts = parse_integer(request, "from_ts", default=0) + if from_ts < 0: + raise SynapseError( + 400, + "Query parameter from_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + + until_ts = parse_integer(request, "until_ts") + if until_ts is not None: + if until_ts < 0: + raise SynapseError( + 400, + "Query parameter until_ts must be a string representing a positive integer.", + errcode=Codes.INVALID_PARAM, + ) + if until_ts <= from_ts: + raise SynapseError( + 400, + "Query parameter until_ts must be greater than from_ts.", + errcode=Codes.INVALID_PARAM, + ) + + search_term = parse_string(request, "search_term") + if search_term == "": + raise SynapseError( + 400, + "Query parameter search_term cannot be an empty string.", + errcode=Codes.INVALID_PARAM, + ) + + direction = parse_string(request, "dir", default="f") + if direction not in ("f", "b"): + raise SynapseError( + 400, "Unknown direction: %s" % (direction,), errcode=Codes.INVALID_PARAM + ) + + users_media, total = await self.store.get_users_media_usage_paginate( + start, limit, from_ts, until_ts, order_by, direction, search_term + ) + ret = {"users": users_media, "total": total} + if (start + limit) < total: + ret["next_token"] = start + len(users_media) + + return 200, ret diff --git a/synapse/storage/databases/main/stats.py b/synapse/storage/databases/main/stats.py index 5beb302be3..0cdb3ec1f7 100644 --- a/synapse/storage/databases/main/stats.py +++ b/synapse/storage/databases/main/stats.py @@ -16,15 +16,18 @@ import logging from collections import Counter +from enum import Enum from itertools import chain from typing import Any, Dict, List, Optional, Tuple from twisted.internet.defer import DeferredLock from synapse.api.constants import EventTypes, Membership +from synapse.api.errors import StoreError from synapse.storage.database import DatabasePool from synapse.storage.databases.main.state_deltas import StateDeltasStore from synapse.storage.engines import PostgresEngine +from synapse.types import JsonDict from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) @@ -59,6 +62,23 @@ TYPE_TO_TABLE = {"room": ("room_stats", "room_id"), "user": ("user_stats", "user TYPE_TO_ORIGIN_TABLE = {"room": ("rooms", "room_id"), "user": ("users", "name")} +class UserSortOrder(Enum): + """ + Enum to define the sorting method used when returning users + with get_users_media_usage_paginate + + MEDIA_LENGTH = ordered by size of uploaded media. Smallest to largest. + MEDIA_COUNT = ordered by number of uploaded media. Smallest to largest. + USER_ID = ordered alphabetically by `user_id`. + DISPLAYNAME = ordered alphabetically by `displayname` + """ + + MEDIA_LENGTH = "media_length" + MEDIA_COUNT = "media_count" + USER_ID = "user_id" + DISPLAYNAME = "displayname" + + class StatsStore(StateDeltasStore): def __init__(self, database: DatabasePool, db_conn, hs): super().__init__(database, db_conn, hs) @@ -882,3 +902,110 @@ class StatsStore(StateDeltasStore): complete_with_stream_id=pos, absolute_field_overrides={"joined_rooms": joined_rooms}, ) + + async def get_users_media_usage_paginate( + self, + start: int, + limit: int, + from_ts: Optional[int] = None, + until_ts: Optional[int] = None, + order_by: Optional[UserSortOrder] = UserSortOrder.USER_ID.value, + direction: Optional[str] = "f", + search_term: Optional[str] = None, + ) -> Tuple[List[JsonDict], Dict[str, int]]: + """Function to retrieve a paginated list of users and their uploaded local media + (size and number). This will return a json list of users and the + total number of users matching the filter criteria. + + Args: + start: offset to begin the query from + limit: number of rows to retrieve + from_ts: request only media that are created later than this timestamp (ms) + until_ts: request only media that are created earlier than this timestamp (ms) + order_by: the sort order of the returned list + direction: sort ascending or descending + search_term: a string to filter user names by + Returns: + A list of user dicts and an integer representing the total number of + users that exist given this query + """ + + def get_users_media_usage_paginate_txn(txn): + filters = [] + args = [self.hs.config.server_name] + + if search_term: + filters.append("(lmr.user_id LIKE ? OR displayname LIKE ?)") + args.extend(["@%" + search_term + "%:%", "%" + search_term + "%"]) + + if from_ts: + filters.append("created_ts >= ?") + args.extend([from_ts]) + if until_ts: + filters.append("created_ts <= ?") + args.extend([until_ts]) + + # Set ordering + if UserSortOrder(order_by) == UserSortOrder.MEDIA_LENGTH: + order_by_column = "media_length" + elif UserSortOrder(order_by) == UserSortOrder.MEDIA_COUNT: + order_by_column = "media_count" + elif UserSortOrder(order_by) == UserSortOrder.USER_ID: + order_by_column = "lmr.user_id" + elif UserSortOrder(order_by) == UserSortOrder.DISPLAYNAME: + order_by_column = "displayname" + else: + raise StoreError( + 500, "Incorrect value for order_by provided: %s" % order_by + ) + + if direction == "b": + order = "DESC" + else: + order = "ASC" + + where_clause = "WHERE " + " AND ".join(filters) if len(filters) > 0 else "" + + sql_base = """ + FROM local_media_repository as lmr + LEFT JOIN profiles AS p ON lmr.user_id = '@' || p.user_id || ':' || ? + {} + GROUP BY lmr.user_id, displayname + """.format( + where_clause + ) + + # SQLite does not support SELECT COUNT(*) OVER() + sql = """ + SELECT COUNT(*) FROM ( + SELECT lmr.user_id + {sql_base} + ) AS count_user_ids + """.format( + sql_base=sql_base, + ) + txn.execute(sql, args) + count = txn.fetchone()[0] + + sql = """ + SELECT + lmr.user_id, + displayname, + COUNT(lmr.user_id) as media_count, + SUM(media_length) as media_length + {sql_base} + ORDER BY {order_by_column} {order} + LIMIT ? OFFSET ? + """.format( + sql_base=sql_base, order_by_column=order_by_column, order=order, + ) + + args += [limit, start] + txn.execute(sql, args) + users = self.db_pool.cursor_to_dict(txn) + + return users, count + + return await self.db_pool.runInteraction( + "get_users_media_usage_paginate_txn", get_users_media_usage_paginate_txn + ) diff --git a/tests/rest/admin/test_statistics.py b/tests/rest/admin/test_statistics.py new file mode 100644 index 0000000000..816683a612 --- /dev/null +++ b/tests/rest/admin/test_statistics.py @@ -0,0 +1,485 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 Dirk Klimpel +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +from binascii import unhexlify +from typing import Any, Dict, List, Optional + +import synapse.rest.admin +from synapse.api.errors import Codes +from synapse.rest.client.v1 import login + +from tests import unittest + + +class UserMediaStatisticsTestCase(unittest.HomeserverTestCase): + servlets = [ + synapse.rest.admin.register_servlets, + login.register_servlets, + ] + + def prepare(self, reactor, clock, hs): + self.store = hs.get_datastore() + self.media_repo = hs.get_media_repository_resource() + + self.admin_user = self.register_user("admin", "pass", admin=True) + self.admin_user_tok = self.login("admin", "pass") + + self.other_user = self.register_user("user", "pass") + self.other_user_tok = self.login("user", "pass") + + self.url = "/_synapse/admin/v1/statistics/users/media" + + def test_no_auth(self): + """ + Try to list users without authentication. + """ + request, channel = self.make_request("GET", self.url, b"{}") + self.render(request) + + self.assertEqual(401, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.MISSING_TOKEN, channel.json_body["errcode"]) + + def test_requester_is_no_admin(self): + """ + If the user is not a server admin, an error 403 is returned. + """ + request, channel = self.make_request( + "GET", self.url, json.dumps({}), access_token=self.other_user_tok, + ) + self.render(request) + + self.assertEqual(403, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.FORBIDDEN, channel.json_body["errcode"]) + + def test_invalid_parameter(self): + """ + If parameters are invalid, an error is returned. + """ + # unkown order_by + request, channel = self.make_request( + "GET", self.url + "?order_by=bar", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # negative from + request, channel = self.make_request( + "GET", self.url + "?from=-5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # negative limit + request, channel = self.make_request( + "GET", self.url + "?limit=-5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # negative from_ts + request, channel = self.make_request( + "GET", self.url + "?from_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # negative until_ts + request, channel = self.make_request( + "GET", self.url + "?until_ts=-1234", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # until_ts smaller from_ts + request, channel = self.make_request( + "GET", + self.url + "?from_ts=10&until_ts=5", + access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # empty search term + request, channel = self.make_request( + "GET", self.url + "?search_term=", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + # invalid search order + request, channel = self.make_request( + "GET", self.url + "?dir=bar", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(400, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(Codes.INVALID_PARAM, channel.json_body["errcode"]) + + def test_limit(self): + """ + Testing list of media with limit + """ + self._create_users_with_media(10, 2) + + request, channel = self.make_request( + "GET", self.url + "?limit=5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 10) + self.assertEqual(len(channel.json_body["users"]), 5) + self.assertEqual(channel.json_body["next_token"], 5) + self._check_fields(channel.json_body["users"]) + + def test_from(self): + """ + Testing list of media with a defined starting point (from) + """ + self._create_users_with_media(20, 2) + + request, channel = self.make_request( + "GET", self.url + "?from=5", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 20) + self.assertEqual(len(channel.json_body["users"]), 15) + self.assertNotIn("next_token", channel.json_body) + self._check_fields(channel.json_body["users"]) + + def test_limit_and_from(self): + """ + Testing list of media with a defined starting point and limit + """ + self._create_users_with_media(20, 2) + + request, channel = self.make_request( + "GET", self.url + "?from=5&limit=10", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 20) + self.assertEqual(channel.json_body["next_token"], 15) + self.assertEqual(len(channel.json_body["users"]), 10) + self._check_fields(channel.json_body["users"]) + + def test_next_token(self): + """ + Testing that `next_token` appears at the right place + """ + + number_users = 20 + self._create_users_with_media(number_users, 3) + + # `next_token` does not appear + # Number of results is the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=20", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), number_users) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does not appear + # Number of max results is larger than the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=21", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), number_users) + self.assertNotIn("next_token", channel.json_body) + + # `next_token` does appear + # Number of max results is smaller than the number of entries + request, channel = self.make_request( + "GET", self.url + "?limit=19", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), 19) + self.assertEqual(channel.json_body["next_token"], 19) + + # Set `from` to value of `next_token` for request remaining entries + # Check `next_token` does not appear + request, channel = self.make_request( + "GET", self.url + "?from=19", access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], number_users) + self.assertEqual(len(channel.json_body["users"]), 1) + self.assertNotIn("next_token", channel.json_body) + + def test_no_media(self): + """ + Tests that a normal lookup for statistics is successfully + if users have no media created + """ + + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(0, channel.json_body["total"]) + self.assertEqual(0, len(channel.json_body["users"])) + + def test_order_by(self): + """ + Testing order list with parameter `order_by` + """ + + # create users + self.register_user("user_a", "pass", displayname="UserZ") + userA_tok = self.login("user_a", "pass") + self._create_media(userA_tok, 1) + + self.register_user("user_b", "pass", displayname="UserY") + userB_tok = self.login("user_b", "pass") + self._create_media(userB_tok, 3) + + self.register_user("user_c", "pass", displayname="UserX") + userC_tok = self.login("user_c", "pass") + self._create_media(userC_tok, 2) + + # order by user_id + self._order_test("user_id", ["@user_a:test", "@user_b:test", "@user_c:test"]) + self._order_test( + "user_id", ["@user_a:test", "@user_b:test", "@user_c:test"], "f", + ) + self._order_test( + "user_id", ["@user_c:test", "@user_b:test", "@user_a:test"], "b", + ) + + # order by displayname + self._order_test( + "displayname", ["@user_c:test", "@user_b:test", "@user_a:test"] + ) + self._order_test( + "displayname", ["@user_c:test", "@user_b:test", "@user_a:test"], "f", + ) + self._order_test( + "displayname", ["@user_a:test", "@user_b:test", "@user_c:test"], "b", + ) + + # order by media_length + self._order_test( + "media_length", ["@user_a:test", "@user_c:test", "@user_b:test"], + ) + self._order_test( + "media_length", ["@user_a:test", "@user_c:test", "@user_b:test"], "f", + ) + self._order_test( + "media_length", ["@user_b:test", "@user_c:test", "@user_a:test"], "b", + ) + + # order by media_count + self._order_test( + "media_count", ["@user_a:test", "@user_c:test", "@user_b:test"], + ) + self._order_test( + "media_count", ["@user_a:test", "@user_c:test", "@user_b:test"], "f", + ) + self._order_test( + "media_count", ["@user_b:test", "@user_c:test", "@user_a:test"], "b", + ) + + def test_from_until_ts(self): + """ + Testing filter by time with parameters `from_ts` and `until_ts` + """ + # create media earlier than `ts1` to ensure that `from_ts` is working + self._create_media(self.other_user_tok, 3) + self.pump(1) + ts1 = self.clock.time_msec() + + # list all media when filter is not set + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users"][0]["media_count"], 3) + + # filter media starting at `ts1` after creating first media + # result is 0 + request, channel = self.make_request( + "GET", self.url + "?from_ts=%s" % (ts1,), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 0) + + self._create_media(self.other_user_tok, 3) + self.pump(1) + ts2 = self.clock.time_msec() + # create media after `ts2` to ensure that `until_ts` is working + self._create_media(self.other_user_tok, 3) + + # filter media between `ts1` and `ts2` + request, channel = self.make_request( + "GET", + self.url + "?from_ts=%s&until_ts=%s" % (ts1, ts2), + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users"][0]["media_count"], 3) + + # filter media until `ts2` and earlier + request, channel = self.make_request( + "GET", self.url + "?until_ts=%s" % (ts2,), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users"][0]["media_count"], 6) + + def test_search_term(self): + self._create_users_with_media(20, 1) + + # check without filter get all users + request, channel = self.make_request( + "GET", self.url, access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 20) + + # filter user 1 and 10-19 by `user_id` + request, channel = self.make_request( + "GET", + self.url + "?search_term=foo_user_1", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 11) + + # filter on this user in `displayname` + request, channel = self.make_request( + "GET", + self.url + "?search_term=bar_user_10", + access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["users"][0]["displayname"], "bar_user_10") + self.assertEqual(channel.json_body["total"], 1) + + # filter and get empty result + request, channel = self.make_request( + "GET", self.url + "?search_term=foobar", access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, int(channel.result["code"]), msg=channel.result["body"]) + self.assertEqual(channel.json_body["total"], 0) + + def _create_users_with_media(self, number_users: int, media_per_user: int): + """ + Create a number of users with a number of media + Args: + number_users: Number of users to be created + media_per_user: Number of media to be created for each user + """ + for i in range(number_users): + self.register_user("foo_user_%s" % i, "pass", displayname="bar_user_%s" % i) + user_tok = self.login("foo_user_%s" % i, "pass") + self._create_media(user_tok, media_per_user) + + def _create_media(self, user_token: str, number_media: int): + """ + Create a number of media for a specific user + Args: + user_token: Access token of the user + number_media: Number of media to be created for the user + """ + upload_resource = self.media_repo.children[b"upload"] + for i in range(number_media): + # file size is 67 Byte + image_data = unhexlify( + b"89504e470d0a1a0a0000000d4948445200000001000000010806" + b"0000001f15c4890000000a49444154789c63000100000500010d" + b"0a2db40000000049454e44ae426082" + ) + + # Upload some media into the room + self.helper.upload_media( + upload_resource, image_data, tok=user_token, expect_code=200 + ) + + def _check_fields(self, content: List[Dict[str, Any]]): + """Checks that all attributes are present in content + Args: + content: List that is checked for content + """ + for c in content: + self.assertIn("user_id", c) + self.assertIn("displayname", c) + self.assertIn("media_count", c) + self.assertIn("media_length", c) + + def _order_test( + self, order_type: str, expected_user_list: List[str], dir: Optional[str] = None + ): + """Request the list of users in a certain order. Assert that order is what + we expect + Args: + order_type: The type of ordering to give the server + expected_user_list: The list of user_ids in the order we expect to get + back from the server + dir: The direction of ordering to give the server + """ + + url = self.url + "?order_by=%s" % (order_type,) + if dir is not None and dir in ("b", "f"): + url += "&dir=%s" % (dir,) + request, channel = self.make_request( + "GET", url.encode("ascii"), access_token=self.admin_user_tok, + ) + self.render(request) + self.assertEqual(200, channel.code, msg=channel.json_body) + self.assertEqual(channel.json_body["total"], len(expected_user_list)) + + returned_order = [row["user_id"] for row in channel.json_body["users"]] + self.assertListEqual(expected_user_list, returned_order) + self._check_fields(channel.json_body["users"]) From fb56dfdccd129ecdc07effbb6e2e18d3b304d821 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 6 Nov 2020 11:42:07 +0000 Subject: [PATCH 15/26] Fix SIGHUP handler (#8697) Fixes: ``` builtins.TypeError: _reload_logging_config() takes 1 positional argument but 2 were given ``` --- changelog.d/8697.misc | 1 + synapse/app/_base.py | 5 ++--- 2 files changed, 3 insertions(+), 3 deletions(-) create mode 100644 changelog.d/8697.misc diff --git a/changelog.d/8697.misc b/changelog.d/8697.misc new file mode 100644 index 0000000000..7982a4e46d --- /dev/null +++ b/changelog.d/8697.misc @@ -0,0 +1 @@ + Re-organize the structured logging code to separate the TCP transport handling from the JSON formatting. diff --git a/synapse/app/_base.py b/synapse/app/_base.py index f6f7b2bf42..9c8dc785c6 100644 --- a/synapse/app/_base.py +++ b/synapse/app/_base.py @@ -49,7 +49,6 @@ def register_sighup(func, *args, **kwargs): Args: func (function): Function to be called when sent a SIGHUP signal. - Will be called with a single default argument, the homeserver. *args, **kwargs: args and kwargs to be passed to the target function. """ _sighup_callbacks.append((func, args, kwargs)) @@ -251,13 +250,13 @@ def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]): sdnotify(b"RELOADING=1") for i, args, kwargs in _sighup_callbacks: - i(hs, *args, **kwargs) + i(*args, **kwargs) sdnotify(b"READY=1") signal.signal(signal.SIGHUP, handle_sighup) - register_sighup(refresh_certificate) + register_sighup(refresh_certificate, hs) # Load the certificate from disk. refresh_certificate(hs) From 2a6b6852940c29320cc463ac347c5632e9dceab2 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 6 Nov 2020 11:59:22 +0000 Subject: [PATCH 16/26] Add documentation about documentation to CONTRIBUTING.md (#8714) This PR adds some documentation that: * Describes who the audience for the `docs/`, `docs/dev/` and `docs/admin/` directories are, as well as Synapse's wiki page. * Stresses that we'd like all documentation to be down in markdown. --- CONTRIBUTING.md | 18 ++++++++++++++++++ changelog.d/8714.doc | 1 + 2 files changed, 19 insertions(+) create mode 100644 changelog.d/8714.doc diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index f7bea79b0d..1d7bb8f969 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -156,6 +156,24 @@ directory, you will need both a regular newsfragment *and* an entry in the debian changelog. (Though typically such changes should be submitted as two separate pull requests.) +## Documentation + +There is a growing amount of documentation located in the [docs](docs) +directory. This documentation is intended primarily for sysadmins running their +own Synapse instance, as well as developers interacting externally with +Synapse. [docs/dev](docs/dev) exists primarily to house documentation for +Synapse developers. [docs/admin_api](docs/admin_api) houses documentation +regarding Synapse's Admin API, which is used mostly by sysadmins and external +service developers. + +New files added to both folders should be written in [Github-Flavoured +Markdown](https://guides.github.com/features/mastering-markdown/), and attempts +should be made to migrate existing documents to markdown where possible. + +Some documentation also exists in [Synapse's Github +Wiki](https://github.com/matrix-org/synapse/wiki), although this is primarily +contributed to by community authors. + ## Sign off In order to have a concrete record that your contribution is intentional diff --git a/changelog.d/8714.doc b/changelog.d/8714.doc new file mode 100644 index 0000000000..bda22714e7 --- /dev/null +++ b/changelog.d/8714.doc @@ -0,0 +1 @@ +Add information regarding the various sources of, and expected contributions to, Synapse's documentation to `CONTRIBUTING.md`. \ No newline at end of file From c059413001cd2ff7c6104cfcd323ed115245ae90 Mon Sep 17 00:00:00 2001 From: Marcus Schopen Date: Fri, 6 Nov 2020 15:33:07 +0100 Subject: [PATCH 17/26] Notes on SSO logins and media_repository worker (#8701) If SSO login is used (e.g. SAML) in a multi worker setup, it should be mentioned that currently all SAML logins must run on the same worker, see https://github.com/matrix-org/synapse/issues/7530 Also, if you are using different ports (for example 443 and 8448) in a reverse proxy for client and federation, the path `/_matrix/media` on the client and federation port must point to the listener of the `media_repository` worker, otherwise you'll get a 404 on the federation port for the path `/_matrix/media`, if a remote server is trying to get the media object on federation port, see https://github.com/matrix-org/synapse/issues/8695 --- changelog.d/8701.doc | 1 + docs/workers.md | 5 +++++ 2 files changed, 6 insertions(+) create mode 100644 changelog.d/8701.doc diff --git a/changelog.d/8701.doc b/changelog.d/8701.doc new file mode 100644 index 0000000000..e2e8b2f79a --- /dev/null +++ b/changelog.d/8701.doc @@ -0,0 +1 @@ +Notes on SSO logins and media_repository worker. \ No newline at end of file diff --git a/docs/workers.md b/docs/workers.md index 4e046bdb31..c53d1bd2ff 100644 --- a/docs/workers.md +++ b/docs/workers.md @@ -262,6 +262,9 @@ using): Note that a HTTP listener with `client` and `federation` resources must be configured in the `worker_listeners` option in the worker config. +Ensure that all SSO logins go to a single process (usually the main process). +For multiple workers not handling the SSO endpoints properly, see +[#7530](https://github.com/matrix-org/synapse/issues/7530). #### Load balancing @@ -420,6 +423,8 @@ and you must configure a single instance to run the background tasks, e.g.: media_instance_running_background_jobs: "media-repository-1" ``` +Note that if a reverse proxy is used , then `/_matrix/media/` must be routed for both inbound client and federation requests (if they are handled separately). + ### `synapse.app.user_dir` Handles searches in the user directory. It can handle REST endpoints matching From 4c7587ef99e8057960a0d9cd3c50e73b90e3c9ae Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Nicolai=20S=C3=B8borg?= Date: Wed, 11 Nov 2020 14:24:53 +0100 Subject: [PATCH 18/26] Catch exceptions in password_providers (#8636) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Nicolai Søborg --- changelog.d/8636.misc | 1 + synapse/handlers/auth.py | 13 +++++++++---- 2 files changed, 10 insertions(+), 4 deletions(-) create mode 100644 changelog.d/8636.misc diff --git a/changelog.d/8636.misc b/changelog.d/8636.misc new file mode 100644 index 0000000000..df4dca42f8 --- /dev/null +++ b/changelog.d/8636.misc @@ -0,0 +1 @@ +Catch exceptions during initialization of `password_providers`. Contributed by Nicolai Søborg. diff --git a/synapse/handlers/auth.py b/synapse/handlers/auth.py index ff103cbb92..213baea2e3 100644 --- a/synapse/handlers/auth.py +++ b/synapse/handlers/auth.py @@ -181,10 +181,15 @@ class AuthHandler(BaseHandler): # better way to break the loop account_handler = ModuleApi(hs, self) - self.password_providers = [ - module(config=config, account_handler=account_handler) - for module, config in hs.config.password_providers - ] + self.password_providers = [] + for module, config in hs.config.password_providers: + try: + self.password_providers.append( + module(config=config, account_handler=account_handler) + ) + except Exception as e: + logger.error("Error while initializing %r: %s", module, e) + raise logger.info("Extra password_providers: %r", self.password_providers) From eedaf90c840f2b66e0cd537ccd462df19b3f5dcf Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Wed, 11 Nov 2020 14:22:40 +0000 Subject: [PATCH 19/26] Better error message when a remote resource uses invalid Content-Type (#8719) --- changelog.d/8719.misc | 1 + synapse/http/matrixfederationclient.py | 10 ++++++++-- 2 files changed, 9 insertions(+), 2 deletions(-) create mode 100644 changelog.d/8719.misc diff --git a/changelog.d/8719.misc b/changelog.d/8719.misc new file mode 100644 index 0000000000..9aabef8fc3 --- /dev/null +++ b/changelog.d/8719.misc @@ -0,0 +1 @@ +Improve the error message returned when a remote server incorrectly sets the `Content-Type` header in response to a JSON request. diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 04766ca965..7e17cdb73e 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -1063,13 +1063,19 @@ def check_content_type_is_json(headers): """ c_type = headers.getRawHeaders(b"Content-Type") if c_type is None: - raise RequestSendFailed(RuntimeError("No Content-Type header"), can_retry=False) + raise RequestSendFailed( + RuntimeError("No Content-Type header received from remote server"), + can_retry=False, + ) c_type = c_type[0].decode("ascii") # only the first header val, options = cgi.parse_header(c_type) if val != "application/json": raise RequestSendFailed( - RuntimeError("Content-Type not application/json: was '%s'" % c_type), + RuntimeError( + "Remote server sent Content-Type header of '%s', not 'application/json'" + % c_type, + ), can_retry=False, ) From 89700dfb8c2fbf375d12edbde01429b4e6bfd884 Mon Sep 17 00:00:00 2001 From: Will Hunt Date: Wed, 11 Nov 2020 14:23:16 +0000 Subject: [PATCH 20/26] Check support room has only two users before sending a notice (#8728) * Check support room has only two users * Create 8728.bugfix * Update synapse/server_notices/server_notices_manager.py Co-authored-by: Erik Johnston Co-authored-by: Erik Johnston --- changelog.d/8728.bugfix | 1 + synapse/server_notices/server_notices_manager.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8728.bugfix diff --git a/changelog.d/8728.bugfix b/changelog.d/8728.bugfix new file mode 100644 index 0000000000..8064aad0ff --- /dev/null +++ b/changelog.d/8728.bugfix @@ -0,0 +1 @@ +Fix bug where the `/_synapse/admin/v1/send_server_notice` API could send notices to non-notice rooms. diff --git a/synapse/server_notices/server_notices_manager.py b/synapse/server_notices/server_notices_manager.py index 0422d4c7ce..d464c75c03 100644 --- a/synapse/server_notices/server_notices_manager.py +++ b/synapse/server_notices/server_notices_manager.py @@ -119,7 +119,7 @@ class ServerNoticesManager: # manages to invite the system user to a room, that doesn't make it # the server notices room. user_ids = await self._store.get_users_in_room(room.room_id) - if self.server_notices_mxid in user_ids: + if len(user_ids) <= 2 and self.server_notices_mxid in user_ids: # we found a room which our user shares with the system notice # user logger.info( From 5829872bec9b9986c741eafec36e47774e4d2b3e Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Nov 2020 15:07:34 +0000 Subject: [PATCH 21/26] Fix port script to handle foreign key constraints (#8730) --- .buildkite/test_db.db | Bin 19279872 -> 19296256 bytes changelog.d/8730.bugfix | 1 + scripts/synapse_port_db | 68 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 63 insertions(+), 6 deletions(-) create mode 100644 changelog.d/8730.bugfix diff --git a/.buildkite/test_db.db b/.buildkite/test_db.db index 361369a581771bed36692a848aa396df96ad59d9..a0d9f16a7522c17f91ace4175a2234b3f6a93941 100644 GIT binary patch delta 2680 zcmajf3v?7$9mnyV&0~{Yb|(pBlR{wc5|BhfD1uroN-1n z57Z_hv4dKmiB_jo(=@hJ@jo zKmK%;GOCP;iK5P6N$INtMIMjR)}J9a_Fp$}SRFg8Dvg5$iu$rg9UJO2n{T@Xwp(bs zKHDv_-D2DI+it*ir`c}Mc0;zS+HQ&MmfCKa?M^@J&d9$$FRTnZK2iRJ*W+<6PAb3ssnnIfy_Yin*x9Jj}=IumClv#p|&Uby$RY zEXKuHf;V6(mf?+f6W)wV@D{ukmtr~IhPPt{F2hP(j#XHV7_Pt?T!{v}18Z>=)~uoL|lzz1Sf&Nqh>Q#xU;02=2ltK7+e)5BA|+d={U>etaHZz!&i)9Kd}zi2Lzn9Kr+m z3JzloU&Vv?8orK)a0K7LI9z-a-@>=?FplCo_%6PO@8c0XipTH+9K#RsBRq~L@MHW0 zKgG}RBz}(LIDx0|G=70+@GK^963^jz{1PwVSNJtv#BcCh{0_gzOL!T7z$-X~)A%F) zgfsXv&f+ilEB=O8@pt?K|HQv;In<-v3K?D*c{1{46v!x);geA$qgaMtMnJ|i89^B# z8LEsD8KpAHWK5SaLq@rbu#5^Bl`<}pF;m7Y8C5c7%b25#b2})FQ8jqNGb3-FM?I=u z6&eg}8Y#LfqKMg^BcbJ?MAT(a~47 zTq#H2q?RmUaPCF!oI1I`dKPv4zBL|oU+7ryrD@Mia{|u*Q%aWU_NIXnU&Z3Z-rbus+O~u~ zWyq8lWpQWrX{E9*TU6AroJEU7rcKXXkT6WKy+gKcne~3*42}C|t`kkMD`HKt##OP6 zxnt9!CTEh`x(fvxRtu|Tvb5Z}+?tdeeK;6iom+BpcjX>rw;-oZI+N11_N`VbuBQyM z^?XsiQ#UhI3Oy2x_A={+riOJZo3@Cxu`U0#q&jiFW_D?tY1J|pWN4x}p`~=8i54rR z8?C0eQQsjfQ~d9QZ91d2>CIY4TBjc^vmh?ob(Up*dxfd%agnivZm^dr(XJV0 z20}AwoF9^?(YuqnnKnB0T4AMF0`pFunMgM?$ZE+}%h}6wN7U7(oAjCzUGj_)dWIF7 zVzPTtqb1UoNLsxa_QKE-i5=p?G@P6~+Xfa9j~ntV;8YwT~

*<#tIY0v99bekq!?skh#IylQlibtL-rL zK=yVsG1u7I&>GB+wFZrul~$iB)l5goDAHZEmSR<-%wW3LXO0UQd*hSZ{3@hk>*pCg zspiDeiY$Ftc!AA3roar=89uYdWwAwjH~W>*&`~G6@F53%kc+g0Qo3D zA&x;224WD3F&M{U2ug4qj>icYic*x}M4W_SI2otlR1C)mjKpaeh0{@vGjJx(!r3?n z6&Q_kF$QCC9x5>o<8eMNzywUhBuvJIxCm2lF{&^X({Kr@F&&rUGR(l`xB@eAC1znZ z=HMz^jcaf%Y7oL)%)@n#?iX~Wz2Hb|*aR-*+ zPTYk?G@%(SxEuFiIaXjLOsv9c+>1515BH-LQLM!Sco6Hb9uMJRY``OU6dSP#kKu8& zVKdtC1fE0(w%{prB8I2Yg=g?Ao@-PH)U2wck`+%HLOR{bZ=RiB~lVC;navGakaOx-LD$k0|=r&1|VOxwHK)Re*u2p B#UKCx diff --git a/changelog.d/8730.bugfix b/changelog.d/8730.bugfix new file mode 100644 index 0000000000..dcc42bc981 --- /dev/null +++ b/changelog.d/8730.bugfix @@ -0,0 +1 @@ +Fix port script to correctly handle foreign key constraints. Broke in v1.21.0. diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 13c0120bb4..7a638ea8e3 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -22,7 +22,7 @@ import logging import sys import time import traceback -from typing import Optional +from typing import Dict, Optional, Set import yaml @@ -292,6 +292,34 @@ class Porter(object): return table, already_ported, total_to_port, forward_chunk, backward_chunk + async def get_table_constraints(self) -> Dict[str, Set[str]]: + """Returns a map of tables that have foreign key constraints to tables they depend on. + """ + + def _get_constraints(txn): + # We can pull the information about foreign key constraints out from + # the postgres schema tables. + sql = """ + SELECT DISTINCT + tc.table_name, + ccu.table_name AS foreign_table_name + FROM + information_schema.table_constraints AS tc + INNER JOIN information_schema.constraint_column_usage AS ccu + USING (table_schema, constraint_name) + WHERE tc.constraint_type = 'FOREIGN KEY'; + """ + txn.execute(sql) + + results = {} + for table, foreign_table in txn: + results.setdefault(table, set()).add(foreign_table) + return results + + return await self.postgres_store.db_pool.runInteraction( + "get_table_constraints", _get_constraints + ) + async def handle_table( self, table, postgres_size, table_size, forward_chunk, backward_chunk ): @@ -619,15 +647,43 @@ class Porter(object): consumeErrors=True, ) ) + # Map from table name to args passed to `handle_table`, i.e. a tuple + # of: `postgres_size`, `table_size`, `forward_chunk`, `backward_chunk`. + tables_to_port_info_map = {r[0]: r[1:] for r in setup_res} # Step 4. Do the copying. + # + # This is slightly convoluted as we need to ensure tables are ported + # in the correct order due to foreign key constraints. self.progress.set_state("Copying to postgres") - await make_deferred_yieldable( - defer.gatherResults( - [run_in_background(self.handle_table, *res) for res in setup_res], - consumeErrors=True, + + constraints = await self.get_table_constraints() + tables_ported = set() # type: Set[str] + + while tables_to_port_info_map: + # Pulls out all tables that are still to be ported and which + # only depend on tables that are already ported (if any). + tables_to_port = [ + table + for table in tables_to_port_info_map + if not constraints.get(table, set()) - tables_ported + ] + + await make_deferred_yieldable( + defer.gatherResults( + [ + run_in_background( + self.handle_table, + table, + *tables_to_port_info_map.pop(table), + ) + for table in tables_to_port + ], + consumeErrors=True, + ) ) - ) + + tables_ported.update(tables_to_port) # Step 5. Set up sequences self.progress.set_state("Setting up sequence generators") From 41a389934e45dd2a9e96b0b465626adef18b25b8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 11 Nov 2020 15:08:03 +0000 Subject: [PATCH 22/26] Fix port script fails when DB has no backfilled events. (#8729) Fixes #8618 --- changelog.d/8729.bugfix | 1 + scripts/synapse_port_db | 12 +++++------- 2 files changed, 6 insertions(+), 7 deletions(-) create mode 100644 changelog.d/8729.bugfix diff --git a/changelog.d/8729.bugfix b/changelog.d/8729.bugfix new file mode 100644 index 0000000000..7f59a3b9e2 --- /dev/null +++ b/changelog.d/8729.bugfix @@ -0,0 +1 @@ +Fix port script fails when DB has no backfilled events. Broke in v1.21.0. diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 7a638ea8e3..604b961bd2 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -876,14 +876,12 @@ class Porter(object): "ALTER SEQUENCE events_stream_seq RESTART WITH %s", (next_id,) ) - txn.execute("SELECT -MIN(stream_ordering) FROM events") + txn.execute("SELECT GREATEST(-MIN(stream_ordering), 1) FROM events") curr_id = txn.fetchone()[0] - if curr_id: - next_id = curr_id + 1 - txn.execute( - "ALTER SEQUENCE events_backfill_stream_seq RESTART WITH %s", - (next_id,), - ) + next_id = curr_id + 1 + txn.execute( + "ALTER SEQUENCE events_backfill_stream_seq RESTART WITH %s", (next_id,), + ) return self.postgres_store.db_pool.runInteraction( "_setup_events_stream_seqs", r From c2d4467cd435851484b29685959124c541d47842 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 12 Nov 2020 14:26:24 +0000 Subject: [PATCH 23/26] Enable reconnection in DB pool (#8726) `adbapi.ConnectionPool` let's you turn on auto reconnect of DB connections. This is off by default. As far as I can tell if its not enabled dead connections never get removed from the pool. Maybe helps #8574 --- changelog.d/8726.bugfix | 1 + synapse/storage/database.py | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) create mode 100644 changelog.d/8726.bugfix diff --git a/changelog.d/8726.bugfix b/changelog.d/8726.bugfix new file mode 100644 index 0000000000..831f773a25 --- /dev/null +++ b/changelog.d/8726.bugfix @@ -0,0 +1 @@ +Fix bug where Synapse would not recover after losing connection to the database. diff --git a/synapse/storage/database.py b/synapse/storage/database.py index a0572b2952..d1b5760c2c 100644 --- a/synapse/storage/database.py +++ b/synapse/storage/database.py @@ -88,13 +88,18 @@ def make_pool( """Get the connection pool for the database. """ + # By default enable `cp_reconnect`. We need to fiddle with db_args in case + # someone has explicitly set `cp_reconnect`. + db_args = dict(db_config.config.get("args", {})) + db_args.setdefault("cp_reconnect", True) + return adbapi.ConnectionPool( db_config.config["name"], cp_reactor=reactor, cp_openfun=lambda conn: engine.on_new_connection( LoggingDatabaseConnection(conn, engine, "on_new_connection") ), - **db_config.config.get("args", {}), + **db_args, ) From 4cb00d297f2afa5ae80c51a3fd761e0eea79c6b3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Nov 2020 11:29:18 +0000 Subject: [PATCH 24/26] Cache event ID to auth event IDs lookups (#8752) This should hopefully speed up `get_auth_chain_difference` a bit in the case of repeated state res on the same rooms. `get_auth_chain_difference` does a breadth first walk of the auth graphs by repeatedly looking up events' auth events. Different state resolutions on the same room will end up doing a lot of the same event to auth events lookups, so by caching them we should speed things up in cases of repeated state resolutions on the same room. --- changelog.d/8752.misc | 1 + .../databases/main/event_federation.py | 82 ++++++++++++++++--- 2 files changed, 71 insertions(+), 12 deletions(-) create mode 100644 changelog.d/8752.misc diff --git a/changelog.d/8752.misc b/changelog.d/8752.misc new file mode 100644 index 0000000000..eac92e9d1d --- /dev/null +++ b/changelog.d/8752.misc @@ -0,0 +1 @@ +Speed up repeated state resolutions on the same room by caching event ID to auth event ID lookups. diff --git a/synapse/storage/databases/main/event_federation.py b/synapse/storage/databases/main/event_federation.py index a6279a6c13..2e07c37340 100644 --- a/synapse/storage/databases/main/event_federation.py +++ b/synapse/storage/databases/main/event_federation.py @@ -26,6 +26,7 @@ from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.signatures import SignatureWorkerStore from synapse.types import Collection from synapse.util.caches.descriptors import cached +from synapse.util.caches.lrucache import LruCache from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) @@ -40,6 +41,11 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas self._delete_old_forward_extrem_cache, 60 * 60 * 1000 ) + # Cache of event ID to list of auth event IDs and their depths. + self._event_auth_cache = LruCache( + 500000, "_event_auth_cache", size_callback=len + ) # type: LruCache[str, List[Tuple[str, int]]] + async def get_auth_chain( self, event_ids: Collection[str], include_given: bool = False ) -> List[EventBase]: @@ -84,17 +90,45 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas else: results = set() - base_sql = "SELECT DISTINCT auth_id FROM event_auth WHERE " + # We pull out the depth simply so that we can populate the + # `_event_auth_cache` cache. + base_sql = """ + SELECT a.event_id, auth_id, depth + FROM event_auth AS a + INNER JOIN events AS e ON (e.event_id = a.auth_id) + WHERE + """ front = set(event_ids) while front: new_front = set() for chunk in batch_iter(front, 100): - clause, args = make_in_list_sql_clause( - txn.database_engine, "event_id", chunk - ) - txn.execute(base_sql + clause, args) - new_front.update(r[0] for r in txn) + # Pull the auth events either from the cache or DB. + to_fetch = [] # Event IDs to fetch from DB # type: List[str] + for event_id in chunk: + res = self._event_auth_cache.get(event_id) + if res is None: + to_fetch.append(event_id) + else: + new_front.update(auth_id for auth_id, depth in res) + + if to_fetch: + clause, args = make_in_list_sql_clause( + txn.database_engine, "a.event_id", to_fetch + ) + txn.execute(base_sql + clause, args) + + # Note we need to batch up the results by event ID before + # adding to the cache. + to_cache = {} + for event_id, auth_event_id, auth_event_depth in txn: + to_cache.setdefault(event_id, []).append( + (auth_event_id, auth_event_depth) + ) + new_front.add(auth_event_id) + + for event_id, auth_events in to_cache.items(): + self._event_auth_cache.set(event_id, auth_events) new_front -= results @@ -213,14 +247,38 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore, SQLBas break # Fetch the auth events and their depths of the N last events we're - # currently walking + # currently walking, either from cache or DB. search, chunk = search[:-100], search[-100:] - clause, args = make_in_list_sql_clause( - txn.database_engine, "a.event_id", [e_id for _, e_id in chunk] - ) - txn.execute(base_sql + clause, args) - for event_id, auth_event_id, auth_event_depth in txn: + found = [] # Results found # type: List[Tuple[str, str, int]] + to_fetch = [] # Event IDs to fetch from DB # type: List[str] + for _, event_id in chunk: + res = self._event_auth_cache.get(event_id) + if res is None: + to_fetch.append(event_id) + else: + found.extend((event_id, auth_id, depth) for auth_id, depth in res) + + if to_fetch: + clause, args = make_in_list_sql_clause( + txn.database_engine, "a.event_id", to_fetch + ) + txn.execute(base_sql + clause, args) + + # We parse the results and add the to the `found` set and the + # cache (note we need to batch up the results by event ID before + # adding to the cache). + to_cache = {} + for event_id, auth_event_id, auth_event_depth in txn: + to_cache.setdefault(event_id, []).append( + (auth_event_id, auth_event_depth) + ) + found.append((event_id, auth_event_id, auth_event_depth)) + + for event_id, auth_events in to_cache.items(): + self._event_auth_cache.set(event_id, auth_events) + + for event_id, auth_event_id, auth_event_depth in found: event_to_auth_events.setdefault(event_id, set()).add(auth_event_id) sets = event_to_missing_sets.get(auth_event_id) From 1b15a3d92cbe1ee9475319ff81abe8760d6be19f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Nov 2020 11:53:51 +0000 Subject: [PATCH 25/26] Fix port script so that it can be run again after failure. (#8755) If the script fails (or is CTRL-C'ed) between porting some of the events table and copying of the sequences then the port script will immediately die if run again due to the postgres DB having inconsistencies between sequences and tables. The fix is to move the porting of sequences to before porting the tables, so that there is never a period where the Postgres DB is inconsistent. To do that we need to change how we port the sequences so that it calculates the values from the SQLite DB rather than the Postgres DB. Fixes #8619 --- changelog.d/8755.bugfix | 1 + scripts/synapse_port_db | 84 ++++++++++++++++++++++++++--------------- 2 files changed, 55 insertions(+), 30 deletions(-) create mode 100644 changelog.d/8755.bugfix diff --git a/changelog.d/8755.bugfix b/changelog.d/8755.bugfix new file mode 100644 index 0000000000..42bbed3ac2 --- /dev/null +++ b/changelog.d/8755.bugfix @@ -0,0 +1 @@ +Fix port script so that it can be run again after a failure. Broke in v1.21.0. diff --git a/scripts/synapse_port_db b/scripts/synapse_port_db index 604b961bd2..5ad17aa90f 100755 --- a/scripts/synapse_port_db +++ b/scripts/synapse_port_db @@ -619,7 +619,18 @@ class Porter(object): "create_port_table", create_port_table ) - # Step 2. Get tables. + # Step 2. Set up sequences + # + # We do this before porting the tables so that event if we fail half + # way through the postgres DB always have sequences that are greater + # than their respective tables. If we don't then creating the + # `DataStore` object will fail due to the inconsistency. + self.progress.set_state("Setting up sequence generators") + await self._setup_state_group_id_seq() + await self._setup_user_id_seq() + await self._setup_events_stream_seqs() + + # Step 3. Get tables. self.progress.set_state("Fetching tables") sqlite_tables = await self.sqlite_store.db_pool.simple_select_onecol( table="sqlite_master", keyvalues={"type": "table"}, retcol="name" @@ -634,7 +645,7 @@ class Porter(object): tables = set(sqlite_tables) & set(postgres_tables) logger.info("Found %d tables", len(tables)) - # Step 3. Figure out what still needs copying + # Step 4. Figure out what still needs copying self.progress.set_state("Checking on port progress") setup_res = await make_deferred_yieldable( defer.gatherResults( @@ -651,7 +662,7 @@ class Porter(object): # of: `postgres_size`, `table_size`, `forward_chunk`, `backward_chunk`. tables_to_port_info_map = {r[0]: r[1:] for r in setup_res} - # Step 4. Do the copying. + # Step 5. Do the copying. # # This is slightly convoluted as we need to ensure tables are ported # in the correct order due to foreign key constraints. @@ -685,12 +696,6 @@ class Porter(object): tables_ported.update(tables_to_port) - # Step 5. Set up sequences - self.progress.set_state("Setting up sequence generators") - await self._setup_state_group_id_seq() - await self._setup_user_id_seq() - await self._setup_events_stream_seqs() - self.progress.done() except Exception as e: global end_error_exec_info @@ -848,43 +853,62 @@ class Porter(object): return done, remaining + done - def _setup_state_group_id_seq(self): + async def _setup_state_group_id_seq(self): + curr_id = await self.sqlite_store.db_pool.simple_select_one_onecol( + table="state_groups", keyvalues={}, retcol="MAX(id)", allow_none=True + ) + + if not curr_id: + return + def r(txn): - txn.execute("SELECT MAX(id) FROM state_groups") - curr_id = txn.fetchone()[0] - if not curr_id: - return next_id = curr_id + 1 txn.execute("ALTER SEQUENCE state_group_id_seq RESTART WITH %s", (next_id,)) - return self.postgres_store.db_pool.runInteraction("setup_state_group_id_seq", r) + await self.postgres_store.db_pool.runInteraction("setup_state_group_id_seq", r) + + async def _setup_user_id_seq(self): + curr_id = await self.sqlite_store.db_pool.runInteraction( + "setup_user_id_seq", find_max_generated_user_id_localpart + ) - def _setup_user_id_seq(self): def r(txn): - next_id = find_max_generated_user_id_localpart(txn) + 1 + next_id = curr_id + 1 txn.execute("ALTER SEQUENCE user_id_seq RESTART WITH %s", (next_id,)) return self.postgres_store.db_pool.runInteraction("setup_user_id_seq", r) - def _setup_events_stream_seqs(self): - def r(txn): - txn.execute("SELECT MAX(stream_ordering) FROM events") - curr_id = txn.fetchone()[0] - if curr_id: - next_id = curr_id + 1 + async def _setup_events_stream_seqs(self): + """Set the event stream sequences to the correct values. + """ + + # We get called before we've ported the events table, so we need to + # fetch the current positions from the SQLite store. + curr_forward_id = await self.sqlite_store.db_pool.simple_select_one_onecol( + table="events", keyvalues={}, retcol="MAX(stream_ordering)", allow_none=True + ) + + curr_backward_id = await self.sqlite_store.db_pool.simple_select_one_onecol( + table="events", + keyvalues={}, + retcol="MAX(-MIN(stream_ordering), 1)", + allow_none=True, + ) + + def _setup_events_stream_seqs_set_pos(txn): + if curr_forward_id: txn.execute( - "ALTER SEQUENCE events_stream_seq RESTART WITH %s", (next_id,) + "ALTER SEQUENCE events_stream_seq RESTART WITH %s", + (curr_forward_id + 1,), ) - txn.execute("SELECT GREATEST(-MIN(stream_ordering), 1) FROM events") - curr_id = txn.fetchone()[0] - next_id = curr_id + 1 txn.execute( - "ALTER SEQUENCE events_backfill_stream_seq RESTART WITH %s", (next_id,), + "ALTER SEQUENCE events_backfill_stream_seq RESTART WITH %s", + (curr_backward_id + 1,), ) - return self.postgres_store.db_pool.runInteraction( - "_setup_events_stream_seqs", r + return await self.postgres_store.db_pool.runInteraction( + "_setup_events_stream_seqs", _setup_events_stream_seqs_set_pos, ) From 427ede619febc4e57fed09364c00c53ddfc5d9c7 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 13 Nov 2020 12:03:51 +0000 Subject: [PATCH 26/26] Add metrics for tracking 3PID /requestToken requests. (#8712) The main use case is to see how many requests are being made, and how many are second/third/etc attempts. If there are large number of retries then that likely indicates a delivery problem. --- changelog.d/8712.misc | 1 + synapse/metrics/__init__.py | 10 ++++++++++ synapse/rest/client/v2_alpha/account.py | 13 +++++++++++++ synapse/rest/client/v2_alpha/register.py | 9 +++++++++ 4 files changed, 33 insertions(+) create mode 100644 changelog.d/8712.misc diff --git a/changelog.d/8712.misc b/changelog.d/8712.misc new file mode 100644 index 0000000000..90d63a9a23 --- /dev/null +++ b/changelog.d/8712.misc @@ -0,0 +1 @@ +Add metrics the allow the local sysadmin to track 3PID `/requestToken` requests. diff --git a/synapse/metrics/__init__.py b/synapse/metrics/__init__.py index b8d2a8e8a9..cbf0dbb871 100644 --- a/synapse/metrics/__init__.py +++ b/synapse/metrics/__init__.py @@ -502,6 +502,16 @@ build_info.labels( last_ticked = time.time() +# 3PID send info +threepid_send_requests = Histogram( + "synapse_threepid_send_requests_with_tries", + documentation="Number of requests for a 3pid token by try count. Note if" + " there is a request with try count of 4, then there would have been one" + " each for 1, 2 and 3", + buckets=(1, 2, 3, 4, 5, 10), + labelnames=("type", "reason"), +) + class ReactorLastSeenMetric: def collect(self): diff --git a/synapse/rest/client/v2_alpha/account.py b/synapse/rest/client/v2_alpha/account.py index 51effc4d8e..a54e1011f7 100644 --- a/synapse/rest/client/v2_alpha/account.py +++ b/synapse/rest/client/v2_alpha/account.py @@ -38,6 +38,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.metrics import threepid_send_requests from synapse.push.mailer import Mailer from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.stringutils import assert_valid_client_secret, random_string @@ -143,6 +144,10 @@ class EmailPasswordRequestTokenRestServlet(RestServlet): # Wrap the session id in a JSON object ret = {"sid": sid} + threepid_send_requests.labels(type="email", reason="password_reset").observe( + send_attempt + ) + return 200, ret @@ -411,6 +416,10 @@ class EmailThreepidRequestTokenRestServlet(RestServlet): # Wrap the session id in a JSON object ret = {"sid": sid} + threepid_send_requests.labels(type="email", reason="add_threepid").observe( + send_attempt + ) + return 200, ret @@ -481,6 +490,10 @@ class MsisdnThreepidRequestTokenRestServlet(RestServlet): next_link, ) + threepid_send_requests.labels(type="msisdn", reason="add_threepid").observe( + send_attempt + ) + return 200, ret diff --git a/synapse/rest/client/v2_alpha/register.py b/synapse/rest/client/v2_alpha/register.py index 8f2c8cd991..ea68114026 100644 --- a/synapse/rest/client/v2_alpha/register.py +++ b/synapse/rest/client/v2_alpha/register.py @@ -45,6 +45,7 @@ from synapse.http.servlet import ( parse_json_object_from_request, parse_string, ) +from synapse.metrics import threepid_send_requests from synapse.push.mailer import Mailer from synapse.util.msisdn import phone_number_to_msisdn from synapse.util.ratelimitutils import FederationRateLimiter @@ -163,6 +164,10 @@ class EmailRegisterRequestTokenRestServlet(RestServlet): # Wrap the session id in a JSON object ret = {"sid": sid} + threepid_send_requests.labels(type="email", reason="register").observe( + send_attempt + ) + return 200, ret @@ -234,6 +239,10 @@ class MsisdnRegisterRequestTokenRestServlet(RestServlet): next_link, ) + threepid_send_requests.labels(type="msisdn", reason="register").observe( + send_attempt + ) + return 200, ret