From bf82b56babc9e2cacba34f8878da3b3834914b3a Mon Sep 17 00:00:00 2001 From: Dirk Klimpel <5740567+dklimpel@users.noreply.github.com> Date: Wed, 1 Feb 2023 16:45:19 +0100 Subject: [PATCH] Add more user information to export-data command. (#14894) * The user's profile information. * The user's devices. * The user's connections / IP address information. --- .ci/scripts/test_export_data_command.sh | 10 ++-- changelog.d/14894.feature | 1 + docs/usage/administration/admin_faq.md | 80 ++++++++++++++++++++----- synapse/app/admin_cmd.py | 32 +++++++++- synapse/handlers/admin.py | 43 +++++++++++++ tests/handlers/test_admin.py | 60 +++++++++++++++++++ 6 files changed, 206 insertions(+), 20 deletions(-) create mode 100644 changelog.d/14894.feature diff --git a/.ci/scripts/test_export_data_command.sh b/.ci/scripts/test_export_data_command.sh index 9f6c49acff..36f836345c 100755 --- a/.ci/scripts/test_export_data_command.sh +++ b/.ci/scripts/test_export_data_command.sh @@ -23,8 +23,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/sqlite-config.yaml export-dat --output-directory /tmp/export_data # Test that the output directory exists and contains the rooms directory -dir="/tmp/export_data/rooms" -if [ -d "$dir" ]; then +dir_r="/tmp/export_data/rooms" +dir_u="/tmp/export_data/user_data" +if [ -d "$dir_r" ] && [ -d "$dir_u" ]; then echo "Command successful, this test passes" else echo "No output directories found, the command fails against a sqlite database." @@ -43,8 +44,9 @@ poetry run python -m synapse.app.admin_cmd -c .ci/postgres-config.yaml export-d --output-directory /tmp/export_data2 # Test that the output directory exists and contains the rooms directory -dir2="/tmp/export_data2/rooms" -if [ -d "$dir2" ]; then +dir_r2="/tmp/export_data2/rooms" +dir_u2="/tmp/export_data2/user_data" +if [ -d "$dir_r2" ] && [ -d "$dir_u2" ]; then echo "Command successful, this test passes" else echo "No output directories found, the command fails against a postgres database." diff --git a/changelog.d/14894.feature b/changelog.d/14894.feature new file mode 100644 index 0000000000..d22741d079 --- /dev/null +++ b/changelog.d/14894.feature @@ -0,0 +1 @@ +Adds profile information, devices and connections to the user data export via command line. \ No newline at end of file diff --git a/docs/usage/administration/admin_faq.md b/docs/usage/administration/admin_faq.md index 18ce6171db..7a27741199 100644 --- a/docs/usage/administration/admin_faq.md +++ b/docs/usage/administration/admin_faq.md @@ -2,13 +2,19 @@ How do I become a server admin? --- -If your server already has an admin account you should use the [User Admin API](../../admin_api/user_admin_api.md#change-whether-a-user-is-a-server-administrator-or-not) to promote other accounts to become admins. +If your server already has an admin account you should use the +[User Admin API](../../admin_api/user_admin_api.md#change-whether-a-user-is-a-server-administrator-or-not) +to promote other accounts to become admins. -If you don't have any admin accounts yet you won't be able to use the admin API, so you'll have to edit the database manually. Manually editing the database is generally not recommended so once you have an admin account: use the admin APIs to make further changes. +If you don't have any admin accounts yet you won't be able to use the admin API, +so you'll have to edit the database manually. Manually editing the database is +generally not recommended so once you have an admin account: use the admin APIs +to make further changes. ```sql UPDATE users SET admin = 1 WHERE name = '@foo:bar.com'; ``` + What servers are my server talking to? --- Run this sql query on your db: @@ -36,8 +42,38 @@ How can I export user data? --- Synapse includes a Python command to export data for a specific user. It takes the homeserver configuration file and the full Matrix ID of the user to export: + ```console -python -m synapse.app.admin_cmd -c export-data +python -m synapse.app.admin_cmd -c export-data --output-directory +``` + +If you uses [Poetry](../../development/dependencies.md#managing-dependencies-with-poetry) +to run Synapse: + +```console +poetry run python -m synapse.app.admin_cmd -c export-data --output-directory +``` + +The directory to store the export data in can be customised with the +`--output-directory` parameter; ensure that the provided directory is +empty. If this parameter is not provided, Synapse defaults to creating +a temporary directory (which starts with "synapse-exfiltrate") in `/tmp`, +`/var/tmp`, or `/usr/tmp`, in that order. + +The exported data has the following layout: + +``` +output-directory +├───rooms +│ └─── +│ ├───events +│ ├───state +│ ├───invite_state +│ └───knock_state +└───user_data + ├───connections + ├───devices + └───profile ``` Manually resetting passwords @@ -50,21 +86,29 @@ I have a problem with my server. Can I just delete my database and start again? --- Deleting your database is unlikely to make anything better. -It's easy to make the mistake of thinking that you can start again from a clean slate by dropping your database, but things don't work like that in a federated network: lots of other servers have information about your server. +It's easy to make the mistake of thinking that you can start again from a clean +slate by dropping your database, but things don't work like that in a federated +network: lots of other servers have information about your server. -For example: other servers might think that you are in a room, your server will think that you are not, and you'll probably be unable to interact with that room in a sensible way ever again. +For example: other servers might think that you are in a room, your server will +think that you are not, and you'll probably be unable to interact with that room +in a sensible way ever again. -In general, there are better solutions to any problem than dropping the database. Come and seek help in https://matrix.to/#/#synapse:matrix.org. +In general, there are better solutions to any problem than dropping the database. +Come and seek help in https://matrix.to/#/#synapse:matrix.org. There are two exceptions when it might be sensible to delete your database and start again: -* You have *never* joined any rooms which are federated with other servers. For instance, a local deployment which the outside world can't talk to. -* You are changing the `server_name` in the homeserver configuration. In effect this makes your server a completely new one from the point of view of the network, so in this case it makes sense to start with a clean database. +* You have *never* joined any rooms which are federated with other servers. For +instance, a local deployment which the outside world can't talk to. +* You are changing the `server_name` in the homeserver configuration. In effect +this makes your server a completely new one from the point of view of the network, +so in this case it makes sense to start with a clean database. (In both cases you probably also want to clear out the media_store.) I've stuffed up access to my room, how can I delete it to free up the alias? --- Using the following curl command: -``` +```console curl -H 'Authorization: Bearer ' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/ ``` `` - can be obtained in riot by looking in the riot settings, down the bottom is: @@ -75,19 +119,25 @@ Access Token:\ How can I find the lines corresponding to a given HTTP request in my homeserver log? --- -Synapse tags each log line according to the HTTP request it is processing. When it finishes processing each request, it logs a line containing the words `Processed request: `. For example: +Synapse tags each log line according to the HTTP request it is processing. When +it finishes processing each request, it logs a line containing the words +`Processed request: `. For example: ``` 2019-02-14 22:35:08,196 - synapse.access.http.8008 - 302 - INFO - GET-37 - ::1 - 8008 - {@richvdh:localhost} Processed request: 0.173sec/0.001sec (0.002sec, 0.000sec) (0.027sec/0.026sec/2) 687B 200 "GET /_matrix/client/r0/sync HTTP/1.1" "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/69.0.3497.100 Safari/537.36" [0 dbevts]" ``` -Here we can see that the request has been tagged with `GET-37`. (The tag depends on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`, `OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do: +Here we can see that the request has been tagged with `GET-37`. (The tag depends +on the method of the HTTP request, so might start with `GET-`, `PUT-`, `POST-`, +`OPTIONS-` or `DELETE-`.) So to find all lines corresponding to this request, we can do: -``` +```console grep 'GET-37' homeserver.log ``` -If you want to paste that output into a github issue or matrix room, please remember to surround it with triple-backticks (```) to make it legible (see [quoting code](https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code)). +If you want to paste that output into a github issue or matrix room, please +remember to surround it with triple-backticks (```) to make it legible +(see [quoting code](https://help.github.com/en/articles/basic-writing-and-formatting-syntax#quoting-code)). What do all those fields in the 'Processed' line mean? @@ -127,7 +177,7 @@ This is normally caused by a misconfiguration in your reverse-proxy. See [the re Help!! Synapse is slow and eats all my RAM/CPU! ------------------------------------------------ +--- First, ensure you are running the latest version of Synapse, using Python 3 with a [PostgreSQL database](../../postgres.md). @@ -169,7 +219,7 @@ in the Synapse config file: [see here](../configuration/config_documentation.md# Running out of File Handles ---------------------------- +--- If Synapse runs out of file handles, it typically fails badly - live-locking at 100% CPU, and/or failing to accept new TCP connections (blocking the diff --git a/synapse/app/admin_cmd.py b/synapse/app/admin_cmd.py index 165d1c5db0..fe7afb9475 100644 --- a/synapse/app/admin_cmd.py +++ b/synapse/app/admin_cmd.py @@ -35,6 +35,7 @@ from synapse.storage.databases.main.appservice import ( ApplicationServiceTransactionWorkerStore, ApplicationServiceWorkerStore, ) +from synapse.storage.databases.main.client_ips import ClientIpWorkerStore from synapse.storage.databases.main.deviceinbox import DeviceInboxWorkerStore from synapse.storage.databases.main.devices import DeviceWorkerStore from synapse.storage.databases.main.event_federation import EventFederationWorkerStore @@ -43,6 +44,7 @@ from synapse.storage.databases.main.event_push_actions import ( ) from synapse.storage.databases.main.events_worker import EventsWorkerStore from synapse.storage.databases.main.filtering import FilteringWorkerStore +from synapse.storage.databases.main.profile import ProfileWorkerStore from synapse.storage.databases.main.push_rule import PushRulesWorkerStore from synapse.storage.databases.main.receipts import ReceiptsWorkerStore from synapse.storage.databases.main.registration import RegistrationWorkerStore @@ -54,7 +56,7 @@ from synapse.storage.databases.main.state import StateGroupWorkerStore from synapse.storage.databases.main.stream import StreamWorkerStore from synapse.storage.databases.main.tags import TagsWorkerStore from synapse.storage.databases.main.user_erasure_store import UserErasureWorkerStore -from synapse.types import StateMap +from synapse.types import JsonDict, StateMap from synapse.util import SYNAPSE_VERSION from synapse.util.logcontext import LoggingContext @@ -63,6 +65,7 @@ logger = logging.getLogger("synapse.app.admin_cmd") class AdminCmdSlavedStore( FilteringWorkerStore, + ClientIpWorkerStore, DeviceWorkerStore, TagsWorkerStore, DeviceInboxWorkerStore, @@ -82,6 +85,7 @@ class AdminCmdSlavedStore( EventsWorkerStore, RegistrationWorkerStore, RoomWorkerStore, + ProfileWorkerStore, ): def __init__( self, @@ -192,6 +196,32 @@ class FileExfiltrationWriter(ExfiltrationWriter): for event in state.values(): print(json.dumps(event), file=f) + def write_profile(self, profile: JsonDict) -> None: + user_directory = os.path.join(self.base_directory, "user_data") + os.makedirs(user_directory, exist_ok=True) + profile_file = os.path.join(user_directory, "profile") + + with open(profile_file, "a") as f: + print(json.dumps(profile), file=f) + + def write_devices(self, devices: List[JsonDict]) -> None: + user_directory = os.path.join(self.base_directory, "user_data") + os.makedirs(user_directory, exist_ok=True) + device_file = os.path.join(user_directory, "devices") + + for device in devices: + with open(device_file, "a") as f: + print(json.dumps(device), file=f) + + def write_connections(self, connections: List[JsonDict]) -> None: + user_directory = os.path.join(self.base_directory, "user_data") + os.makedirs(user_directory, exist_ok=True) + connection_file = os.path.join(user_directory, "connections") + + for connection in connections: + with open(connection_file, "a") as f: + print(json.dumps(connection), file=f) + def finished(self) -> str: return self.base_directory diff --git a/synapse/handlers/admin.py b/synapse/handlers/admin.py index c81ea34758..b03c214b14 100644 --- a/synapse/handlers/admin.py +++ b/synapse/handlers/admin.py @@ -30,6 +30,7 @@ logger = logging.getLogger(__name__) class AdminHandler: def __init__(self, hs: "HomeServer"): self.store = hs.get_datastores().main + self._device_handler = hs.get_device_handler() self._storage_controllers = hs.get_storage_controllers() self._state_storage_controller = self._storage_controllers.state self._msc3866_enabled = hs.config.experimental.msc3866.enabled @@ -247,6 +248,21 @@ class AdminHandler: ) writer.write_state(room_id, event_id, state) + # Get the user profile + profile = await self.get_user(UserID.from_string(user_id)) + if profile is not None: + writer.write_profile(profile) + + # Get all devices the user has + devices = await self._device_handler.get_devices_by_user(user_id) + writer.write_devices(devices) + + # Get all connections the user has + connections = await self.get_whois(UserID.from_string(user_id)) + writer.write_connections( + connections["devices"][""]["sessions"][0]["connections"] + ) + return writer.finished() @@ -297,6 +313,33 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): """ raise NotImplementedError() + @abc.abstractmethod + def write_profile(self, profile: JsonDict) -> None: + """Write the profile of a user. + + Args: + profile: The user profile. + """ + raise NotImplementedError() + + @abc.abstractmethod + def write_devices(self, devices: List[JsonDict]) -> None: + """Write the devices of a user. + + Args: + devices: The list of devices. + """ + raise NotImplementedError() + + @abc.abstractmethod + def write_connections(self, connections: List[JsonDict]) -> None: + """Write the connections of a user. + + Args: + connections: The list of connections / sessions. + """ + raise NotImplementedError() + @abc.abstractmethod def finished(self) -> Any: """Called when all data has successfully been exported and written. diff --git a/tests/handlers/test_admin.py b/tests/handlers/test_admin.py index c1579dac61..6f300b8e11 100644 --- a/tests/handlers/test_admin.py +++ b/tests/handlers/test_admin.py @@ -38,6 +38,7 @@ class ExfiltrateData(unittest.HomeserverTestCase): def prepare(self, reactor: MemoryReactor, clock: Clock, hs: HomeServer) -> None: self.admin_handler = hs.get_admin_handler() + self._store = hs.get_datastores().main self.user1 = self.register_user("user1", "password") self.token1 = self.login("user1", "password") @@ -236,3 +237,62 @@ class ExfiltrateData(unittest.HomeserverTestCase): self.assertEqual(args[0], room_id) self.assertEqual(args[1].content["membership"], "knock") self.assertTrue(args[2]) # Assert there is at least one bit of state + + def test_profile(self) -> None: + """Tests that user profile get exported.""" + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + writer.write_events.assert_not_called() + writer.write_profile.assert_called_once() + + # check only a few values, not all available + args = writer.write_profile.call_args[0] + self.assertEqual(args[0]["name"], self.user2) + self.assertIn("displayname", args[0]) + self.assertIn("avatar_url", args[0]) + self.assertIn("threepids", args[0]) + self.assertIn("external_ids", args[0]) + self.assertIn("creation_ts", args[0]) + + def test_devices(self) -> None: + """Tests that user devices get exported.""" + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + writer.write_events.assert_not_called() + writer.write_devices.assert_called_once() + + args = writer.write_devices.call_args[0] + self.assertEqual(len(args[0]), 1) + self.assertEqual(args[0][0]["user_id"], self.user2) + self.assertIn("device_id", args[0][0]) + self.assertIsNone(args[0][0]["display_name"]) + self.assertIsNone(args[0][0]["last_seen_user_agent"]) + self.assertIsNone(args[0][0]["last_seen_ts"]) + self.assertIsNone(args[0][0]["last_seen_ip"]) + + def test_connections(self) -> None: + """Tests that user sessions / connections get exported.""" + # Insert a user IP + self.get_success( + self._store.insert_client_ip( + self.user2, "access_token", "ip", "user_agent", "MY_DEVICE" + ) + ) + + writer = Mock() + + self.get_success(self.admin_handler.export_user_data(self.user2, writer)) + + writer.write_events.assert_not_called() + writer.write_connections.assert_called_once() + + args = writer.write_connections.call_args[0] + self.assertEqual(len(args[0]), 1) + self.assertEqual(args[0][0]["ip"], "ip") + self.assertEqual(args[0][0]["user_agent"], "user_agent") + self.assertGreater(args[0][0]["last_seen"], 0) + self.assertNotIn("access_token", args[0][0])