From 4abc988c6a1020a8f9e5d3aec92f4b817f6e352e Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 11 Mar 2019 21:11:36 +1100 Subject: [PATCH 01/15] initial --- synapse/handlers/user_directory.py | 41 ++++++++++++++++++- .../storage/schema/delta/53/user_share.sql | 3 -- .../schema/delta/53/users_in_public_rooms.sql | 28 +++++++++++++ synapse/storage/user_directory.py | 34 +++++++++++++++ tests/handlers/test_user_directory.py | 12 ++++++ 5 files changed, 114 insertions(+), 4 deletions(-) create mode 100644 synapse/storage/schema/delta/53/users_in_public_rooms.sql diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index c21da8343a..fc45123d0c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -64,6 +64,10 @@ class UserDirectoryHandler(object): # This is a set of user_id's we've inserted already self.initially_handled_users = set() + self.register_background_update_handler( + "users_in_public_rooms_initial", self._populate_users_in_public_rooms + ) + # The current position in the current_state_delta stream self.pos = None @@ -77,6 +81,41 @@ class UserDirectoryHandler(object): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) + @defer.inlineCallbacks + def _populate_users_in_public_rooms(self, progress, batch_size): + """ + Populate the users_in_public_rooms table with the contents of the + users_who_share_public_rooms table. + """ + + def _fetch(txn): + sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" + txn.execute(sql) + return txn.fetchall() + + users = yield self.store.runInteraction( + "populate_users_in_public_rooms_fetch", _fetch + ) + + if users: + + def _fill(txn): + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id"], + key_values=users, + value_names=(), + value_values=None, + ) + + users = yield self.store.runInteraction( + "populate_users_in_public_rooms_fill", _fill + ) + + yield self._end_background_update("users_in_public_rooms_initial") + defer.returnValue(1) + def search_users(self, user_id, search_term, limit): """Searches for users in directory @@ -231,7 +270,7 @@ class UserDirectoryHandler(object): unhandled_users = user_ids - self.initially_handled_users yield self.store.add_profiles_to_user_dir( - {user_id: users_with_profile[user_id] for user_id in unhandled_users}, + {user_id: users_with_profile[user_id] for user_id in unhandled_users} ) self.initially_handled_users |= unhandled_users diff --git a/synapse/storage/schema/delta/53/user_share.sql b/synapse/storage/schema/delta/53/user_share.sql index 14424ded0c..5831b1a6f8 100644 --- a/synapse/storage/schema/delta/53/user_share.sql +++ b/synapse/storage/schema/delta/53/user_share.sql @@ -16,9 +16,6 @@ -- Old disused version of the tables below. DROP TABLE IF EXISTS users_who_share_rooms; --- This is no longer used because it's duplicated by the users_who_share_public_rooms -DROP TABLE IF EXISTS users_in_public_rooms; - -- Tables keeping track of what users share rooms. This is a map of local users -- to local or remote users, per room. Remote users cannot be in the user_id -- column, only the other_user_id column. There are two tables, one for public diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql new file mode 100644 index 0000000000..bd57fd778b --- /dev/null +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -0,0 +1,28 @@ +/* Copyright 2019 New Vector Ltd + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +-- We don't need the old version of this table. +DROP TABLE IF EXISTS users_in_public_rooms; + +-- Track what users are in public rooms. +CREATE TABLE IF NOT EXISTS users_in_public_rooms ( + user_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id); + +-- Fill the table. +INSERT INTO background_updates (update_name, progress_json) VALUES + ('users_in_public_rooms_initial', '{}'); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 2317d22ed6..8f40277b50 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -241,6 +241,9 @@ class UserDirectoryStore(SQLBaseStore): self._simple_delete_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id} ) + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) self._simple_delete_txn( txn, table="users_who_share_public_rooms", @@ -339,6 +342,21 @@ class UserDirectoryStore(SQLBaseStore): value_names=(), value_values=None, ) + + # If it's a public room, also update them in users_in_public_rooms. + # We don't look before they're in the table before we do it, as it's + # more efficient to simply have Postgres do that (one UPSERT vs one + # SELECT and maybe one INSERT). + if not share_private: + for user_id in set([x[1] for x in user_id_tuples]): + self._simple_upsert_txn( + txn, + "users_in_public_rooms", + keyvalues={"user_id": user_id}, + values={}, + desc="add_user_as_in_public_room", + ) + for user_id, other_user_id in user_id_tuples: txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) @@ -379,6 +397,21 @@ class UserDirectoryStore(SQLBaseStore): table="users_who_share_public_rooms", keyvalues={"other_user_id": user_id, "room_id": room_id}, ) + + # Are the users still in a public room after we deleted them from this one? + still_in_public = self._simple_select_one_onecol_txn( + txn, + "users_who_share_public_rooms", + keyvalues={"other_user_id": user_id}, + retcol="other_user_id", + allow_none=True, + ) + + if still_in_public is None: + self._simple_delete_txn( + txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + ) + txn.call_after( self.get_users_who_share_room_from_dir.invalidate, (user_id,) ) @@ -452,6 +485,7 @@ class UserDirectoryStore(SQLBaseStore): def _delete_all_from_user_dir_txn(txn): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") + txn.execute("DELETE FROM users_in_public_rooms") txn.execute("DELETE FROM users_who_share_public_rooms") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index a16a2dc67b..0e0ac0a48b 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -121,6 +121,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) + self.assertEqual(set(public_users), set([u1, u2])) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -140,9 +141,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have removed the values. shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() self.assertEqual(shares_public, []) self.assertEqual(self._compress_shared(shares_private), set()) + self.assertEqual(public_users, [u1]) # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -160,6 +163,15 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): r.add((i["user_id"], i["other_user_id"], i["room_id"])) return r + def get_users_in_public_rooms(self): + return self.get_success( + self.store._simple_select_list( + "users_in_public_rooms", + None, + ["user_id"], + ) + ) + def get_users_who_share_public_rooms(self): return self.get_success( self.store._simple_select_list( From 8da22e2b53614aa42776f9709a4265320e240765 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 11 Mar 2019 21:13:35 +1100 Subject: [PATCH 02/15] master startup --- synapse/app/homeserver.py | 1 + synapse/server.py | 8 ++++++++ 2 files changed, 9 insertions(+) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index e8b6cc3114..e0431608e8 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -376,6 +376,7 @@ def setup(config_options): logger.info("Database prepared in %s.", config.database_config['name']) hs.setup() + hs.setup_master() @defer.inlineCallbacks def do_acme(): diff --git a/synapse/server.py b/synapse/server.py index 72835e8c86..c992bbaa5f 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -185,6 +185,10 @@ class HomeServer(object): 'registration_handler', ] + REQUIRED_ON_MASTER_STARTUP = [ + "user_directory_handler", + ] + # This is overridden in derived application classes # (such as synapse.app.homeserver.SynapseHomeServer) and gives the class to be # instantiated during setup() for future return by get_datastore() @@ -221,6 +225,10 @@ class HomeServer(object): conn.commit() logger.info("Finished setting up.") + def setup_master(self): + for i in self.REQUIRED_ON_MASTER_STARTUP: + getattr(self, "get_" + i)() + def get_reactor(self): """ Fetch the Twisted reactor in use by this HomeServer. From 26eefca3b7c4ba06afde12e090bbc3ee75955e98 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Mon, 11 Mar 2019 21:16:10 +1100 Subject: [PATCH 03/15] setup master --- tests/utils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tests/utils.py b/tests/utils.py index 9c8dc9dbce..03b5a05b22 100644 --- a/tests/utils.py +++ b/tests/utils.py @@ -331,6 +331,8 @@ def setup_test_homeserver( cleanup_func(cleanup) hs.setup() + if homeserverToUse.__name__ == "TestHomeServer": + hs.setup_master() else: hs = homeserverToUse( name, From 5ba8ceab4cd7062d9f1b23a19d43f8a9ef7c5d60 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:35:31 +1100 Subject: [PATCH 04/15] fixes --- synapse/handlers/user_directory.py | 45 ++++------------------- synapse/storage/_base.py | 13 +++++-- synapse/storage/user_directory.py | 52 +++++++++++++++++++++++---- tests/handlers/test_user_directory.py | 16 ++++++--- 4 files changed, 74 insertions(+), 52 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index fc45123d0c..20a026e776 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -60,14 +60,16 @@ class UserDirectoryHandler(object): self.update_user_directory = hs.config.update_user_directory self.search_all_users = hs.config.user_directory_search_all_users + # If we're a worker, don't sleep when doing the initial room work, as it + # won't monopolise the master's CPU. + if hs.config.worker_app: + self.INITIAL_ROOM_SLEEP_MS = 0 + self.INITIAL_USER_SLEEP_MS = 0 + # When start up for the first time we need to populate the user_directory. # This is a set of user_id's we've inserted already self.initially_handled_users = set() - self.register_background_update_handler( - "users_in_public_rooms_initial", self._populate_users_in_public_rooms - ) - # The current position in the current_state_delta stream self.pos = None @@ -81,41 +83,6 @@ class UserDirectoryHandler(object): # we start populating the user directory self.clock.call_later(0, self.notify_new_event) - @defer.inlineCallbacks - def _populate_users_in_public_rooms(self, progress, batch_size): - """ - Populate the users_in_public_rooms table with the contents of the - users_who_share_public_rooms table. - """ - - def _fetch(txn): - sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" - txn.execute(sql) - return txn.fetchall() - - users = yield self.store.runInteraction( - "populate_users_in_public_rooms_fetch", _fetch - ) - - if users: - - def _fill(txn): - self._simple_upsert_many_txn( - txn, - table="users_in_public_rooms", - key_names=["user_id"], - key_values=users, - value_names=(), - value_values=None, - ) - - users = yield self.store.runInteraction( - "populate_users_in_public_rooms_fill", _fill - ) - - yield self._end_background_update("users_in_public_rooms_initial") - defer.returnValue(1) - def search_users(self, user_id, search_term, limit): """Searches for users in directory diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index a0333d5309..7e3903859b 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -767,18 +767,25 @@ class SQLBaseStore(object): """ allvalues = {} allvalues.update(keyvalues) - allvalues.update(values) allvalues.update(insertion_values) + if not values: + latter = "NOTHING" + else: + allvalues.update(values) + latter = ( + "UPDATE SET " + ", ".join(k + "=EXCLUDED." + k for k in values) + ) + sql = ( "INSERT INTO %s (%s) VALUES (%s) " - "ON CONFLICT (%s) DO UPDATE SET %s" + "ON CONFLICT (%s) DO %s" ) % ( table, ", ".join(k for k in allvalues), ", ".join("?" for _ in allvalues), ", ".join(k for k in keyvalues), - ", ".join(k + "=EXCLUDED." + k for k in values), + latter ) txn.execute(sql, list(allvalues.values())) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 8f40277b50..a15366a117 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -22,16 +22,57 @@ from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules from synapse.storage.engines import PostgresEngine, Sqlite3Engine +from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks -from ._base import SQLBaseStore - logger = logging.getLogger(__name__) -class UserDirectoryStore(SQLBaseStore): +class UserDirectoryStore(BackgroundUpdateStore): + def __init__(self, dbconn, hs): + super(UserDirectoryStore, self).__init__(dbconn, hs) + + self.register_background_update_handler( + "users_in_public_rooms_initial", self._populate_users_in_public_rooms + ) + + + @defer.inlineCallbacks + def _populate_users_in_public_rooms(self, progress, batch_size): + """ + Populate the users_in_public_rooms table with the contents of the + users_who_share_public_rooms table. + """ + + def _fetch(txn): + sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" + txn.execute(sql) + return txn.fetchall() + + users = yield self.runInteraction( + "populate_users_in_public_rooms_fetch", _fetch + ) + + if users: + def _fill(txn): + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id"], + key_values=users, + value_names=(), + value_values=None, + ) + + users = yield self.runInteraction( + "populate_users_in_public_rooms_fill", _fill + ) + + yield self._end_background_update("users_in_public_rooms_initial") + defer.returnValue(1) + @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -353,8 +394,7 @@ class UserDirectoryStore(SQLBaseStore): txn, "users_in_public_rooms", keyvalues={"user_id": user_id}, - values={}, - desc="add_user_as_in_public_room", + values=None, ) for user_id, other_user_id in user_id_tuples: @@ -603,7 +643,7 @@ class UserDirectoryStore(SQLBaseStore): else: join_clause = """ LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_public_rooms + SELECT user_id FROM users_in_public_rooms UNION SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 0e0ac0a48b..7a78451a6d 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -116,12 +116,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have populated the database correctly. shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() self.assertEqual(shares_public, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) - self.assertEqual(set(public_users), set([u1, u2])) + self.assertEqual(public_users, []) # We get one search result when searching for user2 by user1. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -145,7 +146,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.assertEqual(shares_public, []) self.assertEqual(self._compress_shared(shares_private), set()) - self.assertEqual(public_users, [u1]) + self.assertEqual(public_users, []) # User1 now gets no search results for any of the other users. s = self.get_success(self.handler.search_users(u1, "user2", 10)) @@ -165,10 +166,10 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): def get_users_in_public_rooms(self): return self.get_success( - self.store._simple_select_list( + self.store._simple_select_onecol( "users_in_public_rooms", None, - ["user_id"], + "user_id", ) ) @@ -214,9 +215,12 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() + # Nothing updated yet self.assertEqual(shares_private, []) self.assertEqual(shares_public, []) + self.assertEqual(public_users, []) # Reset the handled users caches self.handler.initially_handled_users = set() @@ -233,6 +237,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() + public_users = self.get_users_in_public_rooms() # User 1 and User 2 share public rooms self.assertEqual( @@ -245,6 +250,9 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): set([(u1, u3, private_room), (u3, u1, private_room)]), ) + # User 1 and 2 are in public rooms + self.assertEqual(set(public_users), set([u1, u2])) + def test_search_all_users(self): """ Search all users = True means that a user does not have to share a From 41a5ba16824a45b88c077e75aedb3de45cd06d1b Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:38:17 +1100 Subject: [PATCH 05/15] changelog --- changelog.d/4846.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/4846.feature diff --git a/changelog.d/4846.feature b/changelog.d/4846.feature new file mode 100644 index 0000000000..8f792b8890 --- /dev/null +++ b/changelog.d/4846.feature @@ -0,0 +1 @@ +The user directory has been rewritten to make it faster, with less chance of falling behind on a large server. From 1b77bd69fbc95e51c31c3c1a2a648f2ecb9ccdb0 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:39:12 +1100 Subject: [PATCH 06/15] pep8 --- synapse/storage/user_directory.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index a15366a117..5d402189e8 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,8 +21,8 @@ from six import iteritems from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules -from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.background_updates import BackgroundUpdateStore +from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id from synapse.util.caches.descriptors import cached, cachedInlineCallbacks @@ -38,7 +38,6 @@ class UserDirectoryStore(BackgroundUpdateStore): "users_in_public_rooms_initial", self._populate_users_in_public_rooms ) - @defer.inlineCallbacks def _populate_users_in_public_rooms(self, progress, batch_size): """ From 78a6b950b3258c3d3e11a0b4341d40ddc99748e2 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 00:50:28 +1100 Subject: [PATCH 07/15] fix --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 5d402189e8..745e6f26ec 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -393,7 +393,7 @@ class UserDirectoryStore(BackgroundUpdateStore): txn, "users_in_public_rooms", keyvalues={"user_id": user_id}, - values=None, + values={}, ) for user_id, other_user_id in user_id_tuples: From c980c7e31facdb33504051942857a0f67410f39a Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 17:51:14 +1100 Subject: [PATCH 08/15] use the old method --- synapse/storage/user_directory.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 745e6f26ec..72a9071d03 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -641,12 +641,11 @@ class UserDirectoryStore(BackgroundUpdateStore): where_clause = "1=1" else: join_clause = """ + LEFT JOIN users_in_public_rooms AS p USING (user_id) LEFT JOIN ( - SELECT user_id FROM users_in_public_rooms - UNION SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? - ) AS p USING (user_id) + ) AS s USING (user_id) """ join_args = (user_id,) where_clause = "p.user_id IS NOT NULL" From 8b618041efc230c87c74b912640b8e5727fcc539 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 18:06:28 +1100 Subject: [PATCH 09/15] fixup --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 72a9071d03..4de552c1bb 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -648,7 +648,7 @@ class UserDirectoryStore(BackgroundUpdateStore): ) AS s USING (user_id) """ join_args = (user_id,) - where_clause = "p.user_id IS NOT NULL" + where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) From 10480c434881d9c38acc02c98ab4b85b98097870 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 21:47:14 +1100 Subject: [PATCH 10/15] fixup --- synapse/handlers/user_directory.py | 119 ++++++++----- .../schema/delta/53/users_in_public_rooms.sql | 17 +- synapse/storage/user_directory.py | 167 ++++++------------ tests/handlers/test_user_directory.py | 36 ++-- 4 files changed, 161 insertions(+), 178 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index 20a026e776..f9f7b8abd0 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -247,38 +247,58 @@ class UserDirectoryHandler(object): # We also batch up inserts/updates, but try to avoid too many at once. to_insert = set() count = 0 - for user_id in user_ids: - if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: - yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - - if not self.is_mine_id(user_id): - count += 1 - continue - - if self.store.get_if_app_services_interested_in_user(user_id): - count += 1 - continue - - for other_user_id in user_ids: - if user_id == other_user_id: - continue + if is_public: + for user_id in user_ids: if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) - count += 1 - user_set = (user_id, other_user_id) - to_insert.add(user_set) + if self.store.get_if_app_services_interested_in_user(user_id): + count += 1 + continue + to_insert.add(user_id) if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: - yield self.store.add_users_who_share_room( - room_id, not is_public, to_insert - ) + yield self.store.add_users_in_public_rooms(room_id, to_insert) to_insert.clear() - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) - to_insert.clear() + if to_insert: + yield self.store.add_users_in_public_rooms(room_id, to_insert) + to_insert.clear() + else: + + for user_id in user_ids: + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) + + if not self.is_mine_id(user_id): + count += 1 + continue + + if self.store.get_if_app_services_interested_in_user(user_id): + count += 1 + continue + + for other_user_id in user_ids: + if user_id == other_user_id: + continue + + if count % self.INITIAL_ROOM_SLEEP_COUNT == 0: + yield self.clock.sleep(self.INITIAL_ROOM_SLEEP_MS / 1000.0) + count += 1 + + user_set = (user_id, other_user_id) + to_insert.add(user_set) + + if len(to_insert) > self.INITIAL_ROOM_BATCH_SIZE: + yield self.store.add_users_who_share_private_room( + room_id, not is_public, to_insert + ) + to_insert.clear() + + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) + to_insert.clear() @defer.inlineCallbacks def _handle_deltas(self, deltas): @@ -451,34 +471,37 @@ class UserDirectoryHandler(object): # Now we update users who share rooms with users. users_with_profile = yield self.state.get_current_user_in_room(room_id) - to_insert = set() + if is_public: + yield self.store.add_users_in_public_rooms(room_id, (user_id,)) + else: + to_insert = set() - # First, if they're our user then we need to update for every user - if self.is_mine_id(user_id): + # First, if they're our user then we need to update for every user + if self.is_mine_id(user_id): - is_appservice = self.store.get_if_app_services_interested_in_user(user_id) + is_appservice = self.store.get_if_app_services_interested_in_user(user_id) - # We don't care about appservice users. - if not is_appservice: - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + # We don't care about appservice users. + if not is_appservice: + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - to_insert.add((user_id, other_user_id)) + to_insert.add((user_id, other_user_id)) - # Next we need to update for every local user in the room - for other_user_id in users_with_profile: - if user_id == other_user_id: - continue + # Next we need to update for every local user in the room + for other_user_id in users_with_profile: + if user_id == other_user_id: + continue - is_appservice = self.store.get_if_app_services_interested_in_user( - other_user_id - ) - if self.is_mine_id(other_user_id) and not is_appservice: - to_insert.add((other_user_id, user_id)) + is_appservice = self.store.get_if_app_services_interested_in_user( + other_user_id + ) + if self.is_mine_id(other_user_id) and not is_appservice: + to_insert.add((other_user_id, user_id)) - if to_insert: - yield self.store.add_users_who_share_room(room_id, not is_public, to_insert) + if to_insert: + yield self.store.add_users_who_share_private_room(room_id, to_insert) @defer.inlineCallbacks def _handle_remove_user(self, room_id, user_id): @@ -493,10 +516,10 @@ class UserDirectoryHandler(object): # Remove user from sharing tables yield self.store.remove_user_who_share_room(user_id, room_id) - # Are they still in a room with members? If not, remove them entirely. - users_in_room_with = yield self.store.get_users_who_share_room_from_dir(user_id) + # Are they still in any rooms? If not, remove them entirely. + rooms_user_is_in = yield self.store.get_rooms_user_is_in(user_id) - if len(users_in_room_with) == 0: + if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) @defer.inlineCallbacks diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql index bd57fd778b..40adc98387 100644 --- a/synapse/storage/schema/delta/53/users_in_public_rooms.sql +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -16,13 +16,20 @@ -- We don't need the old version of this table. DROP TABLE IF EXISTS users_in_public_rooms; +-- Old version of users_in_public_rooms +DROP TABLE IF EXISTS users_who_share_public_rooms; + -- Track what users are in public rooms. CREATE TABLE IF NOT EXISTS users_in_public_rooms ( + user_id TEXT NOT NULL, + room_id TEXT NOT NULL +); + +CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); + +-- Track what users are publicly visible +CREATE TABLE IF NOT EXISTS publicly_visible_users ( user_id TEXT NOT NULL ); -CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id); - --- Fill the table. -INSERT INTO background_updates (update_name, progress_json) VALUES - ('users_in_public_rooms_initial', '{}'); +CREATE UNIQUE INDEX publicly_visible_users_u_idx ON publicly_visible_users(user_id); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 4de552c1bb..af4260bc61 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,57 +21,15 @@ from six import iteritems from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules -from synapse.storage.background_updates import BackgroundUpdateStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id -from synapse.util.caches.descriptors import cached, cachedInlineCallbacks +from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(BackgroundUpdateStore): - def __init__(self, dbconn, hs): - super(UserDirectoryStore, self).__init__(dbconn, hs) - - self.register_background_update_handler( - "users_in_public_rooms_initial", self._populate_users_in_public_rooms - ) - - @defer.inlineCallbacks - def _populate_users_in_public_rooms(self, progress, batch_size): - """ - Populate the users_in_public_rooms table with the contents of the - users_who_share_public_rooms table. - """ - - def _fetch(txn): - sql = "SELECT DISTINCT other_user_id FROM users_who_share_public_rooms" - txn.execute(sql) - return txn.fetchall() - - users = yield self.runInteraction( - "populate_users_in_public_rooms_fetch", _fetch - ) - - if users: - def _fill(txn): - self._simple_upsert_many_txn( - txn, - table="users_in_public_rooms", - key_names=["user_id"], - key_values=users, - value_names=(), - value_values=None, - ) - - users = yield self.runInteraction( - "populate_users_in_public_rooms_fill", _fill - ) - - yield self._end_background_update("users_in_public_rooms_initial") - defer.returnValue(1) - +class UserDirectoryStore(object): @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable @@ -281,19 +239,12 @@ class UserDirectoryStore(BackgroundUpdateStore): self._simple_delete_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id} ) + self._simple_delete_txn( + txn, table="publicly_visible_users", keyvalues={"user_id": user_id} + ) self._simple_delete_txn( txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) - self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"user_id": user_id}, - ) - self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"other_user_id": user_id}, - ) self._simple_delete_txn( txn, table="users_who_share_private_rooms", @@ -314,9 +265,9 @@ class UserDirectoryStore(BackgroundUpdateStore): in the given room_id """ user_ids_share_pub = yield self._simple_select_onecol( - table="users_who_share_public_rooms", + table="publicly_visible_users", keyvalues={"room_id": room_id}, - retcol="other_user_id", + retcol="user_id", desc="get_users_in_dir_due_to_room", ) @@ -354,26 +305,19 @@ class UserDirectoryStore(BackgroundUpdateStore): rows = yield self._execute("get_all_local_users", None, sql) defer.returnValue([name for name, in rows]) - def add_users_who_share_room(self, room_id, share_private, user_id_tuples): - """Insert entries into the users_who_share_*_rooms table. The first + def add_users_who_share_private_room(self, room_id, user_id_tuples): + """Insert entries into the users_who_share_private_rooms table. The first user should be a local user. Args: room_id (str) - share_private (bool): Is the room private user_id_tuples([(str, str)]): iterable of 2-tuple of user IDs. """ def _add_users_who_share_room_txn(txn): - - if share_private: - tbl = "users_who_share_private_rooms" - else: - tbl = "users_who_share_public_rooms" - self._simple_upsert_many_txn( txn, - table=tbl, + table="users_who_share_private_rooms", key_names=["user_id", "other_user_id", "room_id"], key_values=[ (user_id, other_user_id, room_id) @@ -383,26 +327,44 @@ class UserDirectoryStore(BackgroundUpdateStore): value_values=None, ) - # If it's a public room, also update them in users_in_public_rooms. + return self.runInteraction( + "add_users_who_share_room", _add_users_who_share_room_txn + ) + + def add_users_in_public_rooms(self, room_id, user_ids): + """Insert entries into the users_who_share_private_rooms table. The first + user should be a local user. + + Args: + room_id (str) + user_ids (list[str]) + """ + + def _add_users_in_public_rooms_txn(txn): + + self._simple_upsert_many_txn( + txn, + table="users_in_public_rooms", + key_names=["user_id", "room_id"], + key_values=[(user_id, room_id) for user_id in user_ids], + value_names=(), + value_values=None, + ) + + # If it's a public room, also update them in publicly_visible_users. # We don't look before they're in the table before we do it, as it's # more efficient to simply have Postgres do that (one UPSERT vs one # SELECT and maybe one INSERT). - if not share_private: - for user_id in set([x[1] for x in user_id_tuples]): - self._simple_upsert_txn( - txn, - "users_in_public_rooms", - keyvalues={"user_id": user_id}, - values={}, - ) - - for user_id, other_user_id in user_id_tuples: - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) + for user_id in user_ids: + self._simple_upsert_txn( + txn, + "publicly_visible_users", + keyvalues={"user_id": user_id}, + values={}, ) return self.runInteraction( - "add_users_who_share_room", _add_users_who_share_room_txn + "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) def remove_user_who_share_room(self, user_id, room_id): @@ -428,40 +390,32 @@ class UserDirectoryStore(BackgroundUpdateStore): ) self._simple_delete_txn( txn, - table="users_who_share_public_rooms", + table="users_in_public_rooms", keyvalues={"user_id": user_id, "room_id": room_id}, ) - self._simple_delete_txn( - txn, - table="users_who_share_public_rooms", - keyvalues={"other_user_id": user_id, "room_id": room_id}, - ) # Are the users still in a public room after we deleted them from this one? still_in_public = self._simple_select_one_onecol_txn( txn, - "users_who_share_public_rooms", - keyvalues={"other_user_id": user_id}, - retcol="other_user_id", + "users_in_public_rooms", + keyvalues={"user_id": user_id}, + retcol="user_id", allow_none=True, ) if still_in_public is None: self._simple_delete_txn( - txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} + txn, table="publicly_visible_users", keyvalues={"user_id": user_id} ) - txn.call_after( - self.get_users_who_share_room_from_dir.invalidate, (user_id,) - ) - return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) - @cachedInlineCallbacks(max_entries=500000, iterable=True) - def get_users_who_share_room_from_dir(self, user_id): - """Returns the set of users who share a room with `user_id` + @defer.inlineCallbacks + def get_rooms_user_is_in(self, user_id): + """ + Returns the rooms that a user is in. Args: user_id(str): Must be a local user @@ -472,23 +426,19 @@ class UserDirectoryStore(BackgroundUpdateStore): rows = yield self._simple_select_onecol( table="users_who_share_private_rooms", keyvalues={"user_id": user_id}, - retcol="other_user_id", - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) pub_rows = yield self._simple_select_onecol( - table="users_who_share_public_rooms", + table="users_in_public_rooms", keyvalues={"user_id": user_id}, - retcol="other_user_id", - desc="get_users_who_share_room_with_user", + retcol="room_id", + desc="get_rooms_user_is_in", ) users = set(pub_rows) users.update(rows) - - # Remove the user themselves from this list. - users.discard(user_id) - defer.returnValue(list(users)) @defer.inlineCallbacks @@ -525,10 +475,9 @@ class UserDirectoryStore(BackgroundUpdateStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM users_who_share_public_rooms") + txn.execute("DELETE FROM publicly_visible_users") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) - txn.call_after(self.get_users_who_share_room_from_dir.invalidate_all) return self.runInteraction( "delete_all_from_user_dir", _delete_all_from_user_dir_txn @@ -641,7 +590,7 @@ class UserDirectoryStore(BackgroundUpdateStore): where_clause = "1=1" else: join_clause = """ - LEFT JOIN users_in_public_rooms AS p USING (user_id) + LEFT JOIN publicly_visible_users AS p USING (user_id) LEFT JOIN ( SELECT other_user_id AS user_id FROM users_who_share_private_rooms WHERE user_id = ? diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index 7a78451a6d..d8248def3f 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -114,11 +114,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.join(room, user=u2, tok=u2_token) # Check we have populated the database correctly. - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) @@ -140,11 +140,11 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.helper.leave(room, user=u2, tok=u2_token) # Check we have removed the values. - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual(self._compress_shared(shares_private), set()) self.assertEqual(public_users, []) @@ -165,20 +165,24 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): return r def get_users_in_public_rooms(self): - return self.get_success( - self.store._simple_select_onecol( + r = self.get_success( + self.store._simple_select_list( "users_in_public_rooms", None, - "user_id", + ("user_id", "room_id"), ) ) + retval = [] + for i in r: + retval.append((i["user_id"], i["room_id"])) + return retval - def get_users_who_share_public_rooms(self): + def get_publicly_visible_users(self): return self.get_success( - self.store._simple_select_list( - "users_who_share_public_rooms", + self.store._simple_select_onecol( + "publicly_visible_users", None, - ["user_id", "other_user_id", "room_id"], + "user_id", ) ) @@ -213,13 +217,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(self.store.update_user_directory_stream_pos(None)) self.get_success(self.store.delete_all_from_user_dir()) - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() # Nothing updated yet self.assertEqual(shares_private, []) - self.assertEqual(shares_public, []) + self.assertEqual(visible_users, []) self.assertEqual(public_users, []) # Reset the handled users caches @@ -235,13 +239,13 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): self.get_success(d) - shares_public = self.get_users_who_share_public_rooms() shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() + visible_users = self.get_publicly_visible_users() # User 1 and User 2 share public rooms self.assertEqual( - self._compress_shared(shares_public), set([(u1, u2, room), (u2, u1, room)]) + set(public_users), set([(u1, room), (u2, room)]) ) # User 1 and User 3 share private rooms @@ -251,7 +255,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): ) # User 1 and 2 are in public rooms - self.assertEqual(set(public_users), set([u1, u2])) + self.assertEqual(set(visible_users), set([u1, u2])) def test_search_all_users(self): """ From 6f5890b2fae4fad92b9448dfaf3ca6c37afc5720 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 22:27:56 +1100 Subject: [PATCH 11/15] fixup --- synapse/storage/user_directory.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index af4260bc61..b848d9db00 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -21,6 +21,7 @@ from six import iteritems from twisted.internet import defer from synapse.api.constants import EventTypes, JoinRules +from synapse.storage._base import SQLBaseStore from synapse.storage.engines import PostgresEngine, Sqlite3Engine from synapse.storage.state import StateFilter from synapse.types import get_domain_from_id, get_localpart_from_id @@ -29,7 +30,7 @@ from synapse.util.caches.descriptors import cached logger = logging.getLogger(__name__) -class UserDirectoryStore(object): +class UserDirectoryStore(SQLBaseStore): @defer.inlineCallbacks def is_room_world_readable_or_publicly_joinable(self, room_id): """Check if the room is either world_readable or publically joinable From 81d9d1bee6bd67ebf7440d6b885210fe67dbe3d1 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 22:28:48 +1100 Subject: [PATCH 12/15] fixup --- synapse/storage/user_directory.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index b848d9db00..0e6619222c 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -266,7 +266,7 @@ class UserDirectoryStore(SQLBaseStore): in the given room_id """ user_ids_share_pub = yield self._simple_select_onecol( - table="publicly_visible_users", + table="users_in_public_rooms", keyvalues={"room_id": room_id}, retcol="user_id", desc="get_users_in_dir_due_to_room", From d306bd1b26586067dcd3119f4c7c24009d377b69 Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Tue, 12 Mar 2019 22:38:01 +1100 Subject: [PATCH 13/15] fixup --- tests/storage/test_user_directory.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/storage/test_user_directory.py b/tests/storage/test_user_directory.py index a2a652a235..512d76e7a3 100644 --- a/tests/storage/test_user_directory.py +++ b/tests/storage/test_user_directory.py @@ -41,8 +41,8 @@ class UserDirectoryStoreTestCase(unittest.TestCase): BOBBY: ProfileInfo(None, "bobby"), }, ) - yield self.store.add_users_who_share_room( - "!room:id", False, ((ALICE, BOB), (BOB, ALICE)) + yield self.store.add_users_in_public_rooms( + "!room:id", (ALICE, BOB) ) @defer.inlineCallbacks From 797b6a63fc5f8cb70d15ca0b98e871a57e712f0c Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 13 Mar 2019 01:17:51 +1100 Subject: [PATCH 14/15] fixup --- synapse/server.py | 5 ++ .../schema/delta/53/users_in_public_rooms.sql | 7 --- synapse/storage/user_directory.py | 59 ++++--------------- tests/handlers/test_user_directory.py | 21 +------ 4 files changed, 17 insertions(+), 75 deletions(-) diff --git a/synapse/server.py b/synapse/server.py index c992bbaa5f..b9549dd042 100644 --- a/synapse/server.py +++ b/synapse/server.py @@ -226,6 +226,11 @@ class HomeServer(object): logger.info("Finished setting up.") def setup_master(self): + """ + Some handlers have side effects on instantiation (like registering + background updates). This function causes them to be fetched, and + therefore instantiated, to run those side effects. + """ for i in self.REQUIRED_ON_MASTER_STARTUP: getattr(self, "get_" + i)() diff --git a/synapse/storage/schema/delta/53/users_in_public_rooms.sql b/synapse/storage/schema/delta/53/users_in_public_rooms.sql index 40adc98387..f7827ca6d2 100644 --- a/synapse/storage/schema/delta/53/users_in_public_rooms.sql +++ b/synapse/storage/schema/delta/53/users_in_public_rooms.sql @@ -26,10 +26,3 @@ CREATE TABLE IF NOT EXISTS users_in_public_rooms ( ); CREATE UNIQUE INDEX users_in_public_rooms_u_idx ON users_in_public_rooms(user_id, room_id); - --- Track what users are publicly visible -CREATE TABLE IF NOT EXISTS publicly_visible_users ( - user_id TEXT NOT NULL -); - -CREATE UNIQUE INDEX publicly_visible_users_u_idx ON publicly_visible_users(user_id); diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 0e6619222c..8fd4fd50da 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -240,9 +240,6 @@ class UserDirectoryStore(SQLBaseStore): self._simple_delete_txn( txn, table="user_directory_search", keyvalues={"user_id": user_id} ) - self._simple_delete_txn( - txn, table="publicly_visible_users", keyvalues={"user_id": user_id} - ) self._simple_delete_txn( txn, table="users_in_public_rooms", keyvalues={"user_id": user_id} ) @@ -352,18 +349,6 @@ class UserDirectoryStore(SQLBaseStore): value_values=None, ) - # If it's a public room, also update them in publicly_visible_users. - # We don't look before they're in the table before we do it, as it's - # more efficient to simply have Postgres do that (one UPSERT vs one - # SELECT and maybe one INSERT). - for user_id in user_ids: - self._simple_upsert_txn( - txn, - "publicly_visible_users", - keyvalues={"user_id": user_id}, - values={}, - ) - return self.runInteraction( "add_users_in_public_rooms", _add_users_in_public_rooms_txn ) @@ -395,20 +380,6 @@ class UserDirectoryStore(SQLBaseStore): keyvalues={"user_id": user_id, "room_id": room_id}, ) - # Are the users still in a public room after we deleted them from this one? - still_in_public = self._simple_select_one_onecol_txn( - txn, - "users_in_public_rooms", - keyvalues={"user_id": user_id}, - retcol="user_id", - allow_none=True, - ) - - if still_in_public is None: - self._simple_delete_txn( - txn, table="publicly_visible_users", keyvalues={"user_id": user_id} - ) - return self.runInteraction( "remove_user_who_share_room", _remove_user_who_share_room_txn ) @@ -476,7 +447,6 @@ class UserDirectoryStore(SQLBaseStore): txn.execute("DELETE FROM user_directory") txn.execute("DELETE FROM user_directory_search") txn.execute("DELETE FROM users_in_public_rooms") - txn.execute("DELETE FROM publicly_visible_users") txn.execute("DELETE FROM users_who_share_private_rooms") txn.call_after(self.get_user_in_directory.invalidate_all) @@ -583,22 +553,19 @@ class UserDirectoryStore(SQLBaseStore): """ if self.hs.config.user_directory_search_all_users: - # make s.user_id null to keep the ordering algorithm happy - join_clause = """ - CROSS JOIN (SELECT NULL as user_id) AS s - """ join_args = () where_clause = "1=1" else: - join_clause = """ - LEFT JOIN publicly_visible_users AS p USING (user_id) - LEFT JOIN ( - SELECT other_user_id AS user_id FROM users_who_share_private_rooms - WHERE user_id = ? - ) AS s USING (user_id) - """ join_args = (user_id,) - where_clause = "(s.user_id IS NOT NULL OR p.user_id IS NOT NULL)" + where_clause = """ + ( + EXISTS (select 1 from users_in_public_rooms WHERE user_id = t.user_id) + OR EXISTS ( + SELECT 1 FROM users_who_share_private_rooms + WHERE user_id = ? AND other_user_id = t.user_id + ) + ) + """ if isinstance(self.database_engine, PostgresEngine): full_query, exact_query, prefix_query = _parse_query_postgres(search_term) @@ -610,9 +577,8 @@ class UserDirectoryStore(SQLBaseStore): # search: (domain, _, display name, localpart) sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND vector @@ to_tsquery('english', ?) @@ -639,7 +605,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (full_query, exact_query, prefix_query, limit + 1) @@ -648,9 +613,8 @@ class UserDirectoryStore(SQLBaseStore): sql = """ SELECT d.user_id AS user_id, display_name, avatar_url - FROM user_directory_search + FROM user_directory_search as t INNER JOIN user_directory AS d USING (user_id) - %s WHERE %s AND value MATCH ? @@ -660,7 +624,6 @@ class UserDirectoryStore(SQLBaseStore): avatar_url IS NULL LIMIT ? """ % ( - join_clause, where_clause, ) args = join_args + (search_query, limit + 1) diff --git a/tests/handlers/test_user_directory.py b/tests/handlers/test_user_directory.py index d8248def3f..114807efc1 100644 --- a/tests/handlers/test_user_directory.py +++ b/tests/handlers/test_user_directory.py @@ -116,9 +116,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have populated the database correctly. shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() - self.assertEqual(visible_users, []) self.assertEqual( self._compress_shared(shares_private), set([(u1, u2, room), (u2, u1, room)]) ) @@ -142,9 +140,7 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): # Check we have removed the values. shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() - self.assertEqual(visible_users, []) self.assertEqual(self._compress_shared(shares_private), set()) self.assertEqual(public_users, []) @@ -177,15 +173,6 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): retval.append((i["user_id"], i["room_id"])) return retval - def get_publicly_visible_users(self): - return self.get_success( - self.store._simple_select_onecol( - "publicly_visible_users", - None, - "user_id", - ) - ) - def get_users_who_share_private_rooms(self): return self.get_success( self.store._simple_select_list( @@ -219,11 +206,9 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() # Nothing updated yet self.assertEqual(shares_private, []) - self.assertEqual(visible_users, []) self.assertEqual(public_users, []) # Reset the handled users caches @@ -241,9 +226,8 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): shares_private = self.get_users_who_share_private_rooms() public_users = self.get_users_in_public_rooms() - visible_users = self.get_publicly_visible_users() - # User 1 and User 2 share public rooms + # User 1 and User 2 are in the same public room self.assertEqual( set(public_users), set([(u1, room), (u2, room)]) ) @@ -254,9 +238,6 @@ class UserDirectoryTestCase(unittest.HomeserverTestCase): set([(u1, u3, private_room), (u3, u1, private_room)]), ) - # User 1 and 2 are in public rooms - self.assertEqual(set(visible_users), set([u1, u2])) - def test_search_all_users(self): """ Search all users = True means that a user does not have to share a From c0332d095f6116c1e8af2738bcc8f1fbe5b4432c Mon Sep 17 00:00:00 2001 From: Amber Brown Date: Wed, 13 Mar 2019 01:30:54 +1100 Subject: [PATCH 15/15] fixup --- synapse/handlers/user_directory.py | 2 +- synapse/storage/user_directory.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/user_directory.py b/synapse/handlers/user_directory.py index f9f7b8abd0..d92f8c529c 100644 --- a/synapse/handlers/user_directory.py +++ b/synapse/handlers/user_directory.py @@ -517,7 +517,7 @@ class UserDirectoryHandler(object): yield self.store.remove_user_who_share_room(user_id, room_id) # Are they still in any rooms? If not, remove them entirely. - rooms_user_is_in = yield self.store.get_rooms_user_is_in(user_id) + rooms_user_is_in = yield self.store.get_user_dir_rooms_user_is_in(user_id) if len(rooms_user_is_in) == 0: yield self.store.remove_from_user_dir(user_id) diff --git a/synapse/storage/user_directory.py b/synapse/storage/user_directory.py index 8fd4fd50da..1c00b956e5 100644 --- a/synapse/storage/user_directory.py +++ b/synapse/storage/user_directory.py @@ -385,7 +385,7 @@ class UserDirectoryStore(SQLBaseStore): ) @defer.inlineCallbacks - def get_rooms_user_is_in(self, user_id): + def get_user_dir_rooms_user_is_in(self, user_id): """ Returns the rooms that a user is in.