Add information on uploaded media to user export command. (#15107)
							parent
							
								
									452b009eb0
								
							
						
					
					
						commit
						a068ad7dd4
					
				|  | @ -0,0 +1 @@ | |||
| Add media information to the command line [user data export tool](https://matrix-org.github.io/synapse/v1.79/usage/administration/admin_faq.html#how-can-i-export-user-data). | ||||
|  | @ -70,13 +70,55 @@ output-directory | |||
| │       ├───state | ||||
| │       ├───invite_state | ||||
| │       └───knock_state | ||||
| └───user_data | ||||
|     ├───account_data | ||||
|     │   ├───global | ||||
|     │   └───<room_id> | ||||
|     ├───connections | ||||
|     ├───devices | ||||
|     └───profile | ||||
| ├───user_data | ||||
| │   ├───account_data | ||||
| │   │   ├───global | ||||
| │   │   └───<room_id> | ||||
| │   ├───connections | ||||
| │   ├───devices | ||||
| │   └───profile | ||||
| └───media_ids | ||||
|     └───<media_id> | ||||
| ``` | ||||
| 
 | ||||
| The `media_ids` folder contains only the metadata of the media uploaded by the user. | ||||
| It does not contain the media itself. | ||||
| Furthermore, only the `media_ids` that Synapse manages itself are exported. | ||||
| If another media repository (e.g. [matrix-media-repo](https://github.com/turt2live/matrix-media-repo)) | ||||
| is used, the data must be exported separately. | ||||
| 
 | ||||
| With the `media_ids` the media files can be downloaded. | ||||
| Media that have been sent in encrypted rooms are only retrieved in encrypted form. | ||||
| The following script can help with download the media files: | ||||
| 
 | ||||
| ```bash | ||||
| #!/usr/bin/env bash | ||||
| 
 | ||||
| # Parameters | ||||
| # | ||||
| #   source_directory: Directory which contains the export with the media_ids. | ||||
| #   target_directory: Directory into which all files are to be downloaded. | ||||
| #   repository_url: Address of the media repository resp. media worker. | ||||
| #   serverName: Name of the server (`server_name` from homeserver.yaml). | ||||
| # | ||||
| #   Example: | ||||
| #       ./download_media.sh /tmp/export_data/media_ids/ /tmp/export_data/media_files/ http://localhost:8008 matrix.example.com | ||||
| 
 | ||||
| source_directory=$1 | ||||
| target_directory=$2 | ||||
| repository_url=$3 | ||||
| serverName=$4 | ||||
| 
 | ||||
| mkdir -p $target_directory | ||||
| 
 | ||||
| for file in $source_directory/*; do | ||||
|     filename=$(basename ${file}) | ||||
|     url=$repository_url/_matrix/media/v3/download/$serverName/$filename | ||||
|     echo "Downloading $filename - $url" | ||||
|     if ! wget -o /dev/null -P $target_directory $url; then | ||||
|         echo "Could not download $filename" | ||||
|     fi | ||||
| done | ||||
| ``` | ||||
| 
 | ||||
| Manually resetting passwords | ||||
|  | @ -87,7 +129,7 @@ can reset a user's password using the [admin API](../../admin_api/user_admin_api | |||
| 
 | ||||
| I have a problem with my server. Can I just delete my database and start again? | ||||
| --- | ||||
| Deleting your database is unlikely to make anything better.  | ||||
| Deleting your database is unlikely to make anything better. | ||||
| 
 | ||||
| It's easy to make the mistake of thinking that you can start again from a clean | ||||
| slate by dropping your database, but things don't work like that in a federated | ||||
|  | @ -102,7 +144,7 @@ Come and seek help in https://matrix.to/#/#synapse:matrix.org. | |||
| 
 | ||||
| There are two exceptions when it might be sensible to delete your database and start again: | ||||
| * You have *never* joined any rooms which are federated with other servers. For | ||||
| instance, a local deployment which the outside world can't talk to.  | ||||
| instance, a local deployment which the outside world can't talk to. | ||||
| * You are changing the `server_name` in the homeserver configuration. In effect | ||||
| this makes your server a completely new one from the point of view of the network, | ||||
| so in this case it makes sense to start with a clean database. | ||||
|  | @ -115,7 +157,7 @@ Using the following curl command: | |||
| curl -H 'Authorization: Bearer <access-token>' -X DELETE https://matrix.org/_matrix/client/r0/directory/room/<room-alias> | ||||
| ``` | ||||
| `<access-token>` - can be obtained in riot by looking in the riot settings, down the bottom is: | ||||
| Access Token:\<click to reveal\>  | ||||
| Access Token:\<click to reveal\> | ||||
| 
 | ||||
| `<room-alias>` - the room alias, eg. #my_room:matrix.org this possibly needs to be URL encoded also, for example  %23my_room%3Amatrix.org | ||||
| 
 | ||||
|  | @ -152,13 +194,13 @@ What are the biggest rooms on my server? | |||
| --- | ||||
| 
 | ||||
| ```sql | ||||
| SELECT s.canonical_alias, g.room_id, count(*) AS num_rows  | ||||
| FROM  | ||||
|   state_groups_state AS g,  | ||||
|   room_stats_state AS s  | ||||
| WHERE g.room_id = s.room_id  | ||||
| SELECT s.canonical_alias, g.room_id, count(*) AS num_rows | ||||
| FROM | ||||
|   state_groups_state AS g, | ||||
|   room_stats_state AS s | ||||
| WHERE g.room_id = s.room_id | ||||
| GROUP BY s.canonical_alias, g.room_id | ||||
| ORDER BY num_rows desc  | ||||
| ORDER BY num_rows desc | ||||
| LIMIT 10; | ||||
| ``` | ||||
| 
 | ||||
|  |  | |||
|  | @ -44,6 +44,7 @@ from synapse.storage.databases.main.event_push_actions import ( | |||
| ) | ||||
| from synapse.storage.databases.main.events_worker import EventsWorkerStore | ||||
| from synapse.storage.databases.main.filtering import FilteringWorkerStore | ||||
| from synapse.storage.databases.main.media_repository import MediaRepositoryStore | ||||
| from synapse.storage.databases.main.profile import ProfileWorkerStore | ||||
| from synapse.storage.databases.main.push_rule import PushRulesWorkerStore | ||||
| from synapse.storage.databases.main.receipts import ReceiptsWorkerStore | ||||
|  | @ -86,6 +87,7 @@ class AdminCmdSlavedStore( | |||
|     RegistrationWorkerStore, | ||||
|     RoomWorkerStore, | ||||
|     ProfileWorkerStore, | ||||
|     MediaRepositoryStore, | ||||
| ): | ||||
|     def __init__( | ||||
|         self, | ||||
|  | @ -235,6 +237,14 @@ class FileExfiltrationWriter(ExfiltrationWriter): | |||
|         with open(account_data_file, "a") as f: | ||||
|             json.dump(account_data, fp=f) | ||||
| 
 | ||||
|     def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None: | ||||
|         file_directory = os.path.join(self.base_directory, "media_ids") | ||||
|         os.makedirs(file_directory, exist_ok=True) | ||||
|         media_id_file = os.path.join(file_directory, media_id) | ||||
| 
 | ||||
|         with open(media_id_file, "w") as f: | ||||
|             json.dump(media_metadata, fp=f) | ||||
| 
 | ||||
|     def finished(self) -> str: | ||||
|         return self.base_directory | ||||
| 
 | ||||
|  |  | |||
|  | @ -252,16 +252,19 @@ class AdminHandler: | |||
|         profile = await self.get_user(UserID.from_string(user_id)) | ||||
|         if profile is not None: | ||||
|             writer.write_profile(profile) | ||||
|             logger.info("[%s] Written profile", user_id) | ||||
| 
 | ||||
|         # Get all devices the user has | ||||
|         devices = await self._device_handler.get_devices_by_user(user_id) | ||||
|         writer.write_devices(devices) | ||||
|         logger.info("[%s] Written %s devices", user_id, len(devices)) | ||||
| 
 | ||||
|         # Get all connections the user has | ||||
|         connections = await self.get_whois(UserID.from_string(user_id)) | ||||
|         writer.write_connections( | ||||
|             connections["devices"][""]["sessions"][0]["connections"] | ||||
|         ) | ||||
|         logger.info("[%s] Written %s connections", user_id, len(connections)) | ||||
| 
 | ||||
|         # Get all account data the user has global and in rooms | ||||
|         global_data = await self._store.get_global_account_data_for_user(user_id) | ||||
|  | @ -269,6 +272,29 @@ class AdminHandler: | |||
|         writer.write_account_data("global", global_data) | ||||
|         for room_id in by_room_data: | ||||
|             writer.write_account_data(room_id, by_room_data[room_id]) | ||||
|         logger.info( | ||||
|             "[%s] Written account data for %s rooms", user_id, len(by_room_data) | ||||
|         ) | ||||
| 
 | ||||
|         # Get all media ids the user has | ||||
|         limit = 100 | ||||
|         start = 0 | ||||
|         while True: | ||||
|             media_ids, total = await self._store.get_local_media_by_user_paginate( | ||||
|                 start, limit, user_id | ||||
|             ) | ||||
|             for media in media_ids: | ||||
|                 writer.write_media_id(media["media_id"], media) | ||||
| 
 | ||||
|             logger.info( | ||||
|                 "[%s] Written %d media_ids of %s", | ||||
|                 user_id, | ||||
|                 (start + len(media_ids)), | ||||
|                 total, | ||||
|             ) | ||||
|             if (start + limit) >= total: | ||||
|                 break | ||||
|             start += limit | ||||
| 
 | ||||
|         return writer.finished() | ||||
| 
 | ||||
|  | @ -359,6 +385,18 @@ class ExfiltrationWriter(metaclass=abc.ABCMeta): | |||
|         """ | ||||
|         raise NotImplementedError() | ||||
| 
 | ||||
|     @abc.abstractmethod | ||||
|     def write_media_id(self, media_id: str, media_metadata: JsonDict) -> None: | ||||
|         """Write the media's metadata of a user. | ||||
|         Exports only the metadata, as this can be fetched from the database via | ||||
|         read only. In order to access the files, a connection to the correct | ||||
|         media repository would be required. | ||||
| 
 | ||||
|         Args: | ||||
|             media_id: ID of the media. | ||||
|             media_metadata: Metadata of one media file. | ||||
|         """ | ||||
| 
 | ||||
|     @abc.abstractmethod | ||||
|     def finished(self) -> Any: | ||||
|         """Called when all data has successfully been exported and written. | ||||
|  |  | |||
|  | @ -23,6 +23,7 @@ from synapse.api.constants import EventTypes, JoinRules | |||
| from synapse.api.room_versions import RoomVersions | ||||
| from synapse.rest.client import knock, login, room | ||||
| from synapse.server import HomeServer | ||||
| from synapse.types import UserID | ||||
| from synapse.util import Clock | ||||
| 
 | ||||
| from tests import unittest | ||||
|  | @ -323,3 +324,31 @@ class ExfiltrateData(unittest.HomeserverTestCase): | |||
|         args = writer.write_account_data.call_args_list[1][0] | ||||
|         self.assertEqual(args[0], "test_room") | ||||
|         self.assertEqual(args[1]["m.per_room"]["b"], 2) | ||||
| 
 | ||||
|     def test_media_ids(self) -> None: | ||||
|         """Tests that media's metadata get exported.""" | ||||
| 
 | ||||
|         self.get_success( | ||||
|             self._store.store_local_media( | ||||
|                 media_id="media_1", | ||||
|                 media_type="image/png", | ||||
|                 time_now_ms=self.clock.time_msec(), | ||||
|                 upload_name=None, | ||||
|                 media_length=50, | ||||
|                 user_id=UserID.from_string(self.user2), | ||||
|             ) | ||||
|         ) | ||||
| 
 | ||||
|         writer = Mock() | ||||
| 
 | ||||
|         self.get_success(self.admin_handler.export_user_data(self.user2, writer)) | ||||
| 
 | ||||
|         writer.write_media_id.assert_called_once() | ||||
| 
 | ||||
|         args = writer.write_media_id.call_args[0] | ||||
|         self.assertEqual(args[0], "media_1") | ||||
|         self.assertEqual(args[1]["media_id"], "media_1") | ||||
|         self.assertEqual(args[1]["media_length"], 50) | ||||
|         self.assertGreater(args[1]["created_ts"], 0) | ||||
|         self.assertIsNone(args[1]["upload_name"]) | ||||
|         self.assertIsNone(args[1]["last_access_ts"]) | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Dirk Klimpel
						Dirk Klimpel