945 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			945 lines
		
	
	
		
			35 KiB
		
	
	
	
		
			Python
		
	
	
| # Copyright 2017 Vector Creations Ltd
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| import logging
 | |
| import re
 | |
| from typing import (
 | |
|     TYPE_CHECKING,
 | |
|     Dict,
 | |
|     Iterable,
 | |
|     List,
 | |
|     Optional,
 | |
|     Sequence,
 | |
|     Set,
 | |
|     Tuple,
 | |
|     cast,
 | |
| )
 | |
| 
 | |
| from synapse.api.errors import StoreError
 | |
| 
 | |
| if TYPE_CHECKING:
 | |
|     from synapse.server import HomeServer
 | |
| 
 | |
| from synapse.api.constants import EventTypes, HistoryVisibility, JoinRules
 | |
| from synapse.storage.database import DatabasePool, LoggingTransaction
 | |
| from synapse.storage.databases.main.state import StateFilter
 | |
| from synapse.storage.databases.main.state_deltas import StateDeltasStore
 | |
| from synapse.storage.engines import PostgresEngine, Sqlite3Engine
 | |
| from synapse.storage.types import Connection
 | |
| from synapse.types import JsonDict, get_domain_from_id, get_localpart_from_id
 | |
| from synapse.util.caches.descriptors import cached
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| TEMP_TABLE = "_temp_populate_user_directory"
 | |
| 
 | |
| 
 | |
| class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
 | |
|     # How many records do we calculate before sending it to
 | |
|     # add_users_who_share_private_rooms?
 | |
|     SHARE_PRIVATE_WORKING_SET = 500
 | |
| 
 | |
|     def __init__(
 | |
|         self,
 | |
|         database: DatabasePool,
 | |
|         db_conn: Connection,
 | |
|         hs: "HomeServer",
 | |
|     ):
 | |
|         super().__init__(database, db_conn, hs)
 | |
| 
 | |
|         self.server_name = hs.hostname
 | |
| 
 | |
|         self.db_pool.updates.register_background_update_handler(
 | |
|             "populate_user_directory_createtables",
 | |
|             self._populate_user_directory_createtables,
 | |
|         )
 | |
|         self.db_pool.updates.register_background_update_handler(
 | |
|             "populate_user_directory_process_rooms",
 | |
|             self._populate_user_directory_process_rooms,
 | |
|         )
 | |
|         self.db_pool.updates.register_background_update_handler(
 | |
|             "populate_user_directory_process_users",
 | |
|             self._populate_user_directory_process_users,
 | |
|         )
 | |
|         self.db_pool.updates.register_background_update_handler(
 | |
|             "populate_user_directory_cleanup", self._populate_user_directory_cleanup
 | |
|         )
 | |
| 
 | |
|     async def _populate_user_directory_createtables(
 | |
|         self, progress: JsonDict, batch_size: int
 | |
|     ) -> int:
 | |
| 
 | |
|         # Get all the rooms that we want to process.
 | |
|         def _make_staging_area(txn: LoggingTransaction) -> None:
 | |
|             sql = (
 | |
|                 "CREATE TABLE IF NOT EXISTS "
 | |
|                 + TEMP_TABLE
 | |
|                 + "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)"
 | |
|             )
 | |
|             txn.execute(sql)
 | |
| 
 | |
|             sql = (
 | |
|                 "CREATE TABLE IF NOT EXISTS "
 | |
|                 + TEMP_TABLE
 | |
|                 + "_position(position TEXT NOT NULL)"
 | |
|             )
 | |
|             txn.execute(sql)
 | |
| 
 | |
|             # Get rooms we want to process from the database
 | |
|             sql = """
 | |
|                 SELECT room_id, count(*) FROM current_state_events
 | |
|                 GROUP BY room_id
 | |
|             """
 | |
|             txn.execute(sql)
 | |
|             rooms = [{"room_id": x[0], "events": x[1]} for x in txn.fetchall()]
 | |
|             self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_rooms", rooms)
 | |
|             del rooms
 | |
| 
 | |
|             sql = (
 | |
|                 "CREATE TABLE IF NOT EXISTS "
 | |
|                 + TEMP_TABLE
 | |
|                 + "_users(user_id TEXT NOT NULL)"
 | |
|             )
 | |
|             txn.execute(sql)
 | |
| 
 | |
|             txn.execute("SELECT name FROM users")
 | |
|             users = [{"user_id": x[0]} for x in txn.fetchall()]
 | |
| 
 | |
|             self.db_pool.simple_insert_many_txn(txn, TEMP_TABLE + "_users", users)
 | |
| 
 | |
|         new_pos = await self.get_max_stream_id_in_current_state_deltas()
 | |
|         await self.db_pool.runInteraction(
 | |
|             "populate_user_directory_temp_build", _make_staging_area
 | |
|         )
 | |
|         await self.db_pool.simple_insert(
 | |
|             TEMP_TABLE + "_position", {"position": new_pos}
 | |
|         )
 | |
| 
 | |
|         await self.db_pool.updates._end_background_update(
 | |
|             "populate_user_directory_createtables"
 | |
|         )
 | |
|         return 1
 | |
| 
 | |
|     async def _populate_user_directory_cleanup(
 | |
|         self,
 | |
|         progress: JsonDict,
 | |
|         batch_size: int,
 | |
|     ) -> int:
 | |
|         """
 | |
|         Update the user directory stream position, then clean up the old tables.
 | |
|         """
 | |
|         position = await self.db_pool.simple_select_one_onecol(
 | |
|             TEMP_TABLE + "_position", {}, "position"
 | |
|         )
 | |
|         await self.update_user_directory_stream_pos(position)
 | |
| 
 | |
|         def _delete_staging_area(txn: LoggingTransaction) -> None:
 | |
|             txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_rooms")
 | |
|             txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_users")
 | |
|             txn.execute("DROP TABLE IF EXISTS " + TEMP_TABLE + "_position")
 | |
| 
 | |
|         await self.db_pool.runInteraction(
 | |
|             "populate_user_directory_cleanup", _delete_staging_area
 | |
|         )
 | |
| 
 | |
|         await self.db_pool.updates._end_background_update(
 | |
|             "populate_user_directory_cleanup"
 | |
|         )
 | |
|         return 1
 | |
| 
 | |
|     async def _populate_user_directory_process_rooms(
 | |
|         self, progress: JsonDict, batch_size: int
 | |
|     ) -> int:
 | |
|         """
 | |
|         Rescan the state of all rooms so we can track
 | |
| 
 | |
|         - who's in a public room;
 | |
|         - which local users share a private room with other users (local
 | |
|           and remote); and
 | |
|         - who should be in the user_directory.
 | |
| 
 | |
|         Args:
 | |
|             progress (dict)
 | |
|             batch_size (int): Maximum number of state events to process
 | |
|                 per cycle.
 | |
| 
 | |
|         Returns:
 | |
|             number of events processed.
 | |
|         """
 | |
|         # If we don't have progress filed, delete everything.
 | |
|         if not progress:
 | |
|             await self.delete_all_from_user_dir()
 | |
| 
 | |
|         def _get_next_batch(
 | |
|             txn: LoggingTransaction,
 | |
|         ) -> Optional[Sequence[Tuple[str, int]]]:
 | |
|             # Only fetch 250 rooms, so we don't fetch too many at once, even
 | |
|             # if those 250 rooms have less than batch_size state events.
 | |
|             sql = """
 | |
|                 SELECT room_id, events FROM %s
 | |
|                 ORDER BY events DESC
 | |
|                 LIMIT 250
 | |
|             """ % (
 | |
|                 TEMP_TABLE + "_rooms",
 | |
|             )
 | |
|             txn.execute(sql)
 | |
|             rooms_to_work_on = cast(List[Tuple[str, int]], txn.fetchall())
 | |
| 
 | |
|             if not rooms_to_work_on:
 | |
|                 return None
 | |
| 
 | |
|             # Get how many are left to process, so we can give status on how
 | |
|             # far we are in processing
 | |
|             txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
 | |
|             result = txn.fetchone()
 | |
|             assert result is not None
 | |
|             progress["remaining"] = result[0]
 | |
| 
 | |
|             return rooms_to_work_on
 | |
| 
 | |
|         rooms_to_work_on = await self.db_pool.runInteraction(
 | |
|             "populate_user_directory_temp_read", _get_next_batch
 | |
|         )
 | |
| 
 | |
|         # No more rooms -- complete the transaction.
 | |
|         if not rooms_to_work_on:
 | |
|             await self.db_pool.updates._end_background_update(
 | |
|                 "populate_user_directory_process_rooms"
 | |
|             )
 | |
|             return 1
 | |
| 
 | |
|         logger.debug(
 | |
|             "Processing the next %d rooms of %d remaining"
 | |
|             % (len(rooms_to_work_on), progress["remaining"])
 | |
|         )
 | |
| 
 | |
|         processed_event_count = 0
 | |
| 
 | |
|         for room_id, event_count in rooms_to_work_on:
 | |
|             is_in_room = await self.is_host_joined(room_id, self.server_name)
 | |
| 
 | |
|             if is_in_room:
 | |
|                 users_with_profile = await self.get_users_in_room_with_profiles(room_id)
 | |
|                 # Throw away users excluded from the directory.
 | |
|                 users_with_profile = {
 | |
|                     user_id: profile
 | |
|                     for user_id, profile in users_with_profile.items()
 | |
|                     if not self.hs.is_mine_id(user_id)
 | |
|                     or await self.should_include_local_user_in_dir(user_id)
 | |
|                 }
 | |
| 
 | |
|                 # Upsert a user_directory record for each remote user we see.
 | |
|                 for user_id, profile in users_with_profile.items():
 | |
|                     # Local users are processed separately in
 | |
|                     # `_populate_user_directory_users`; there we can read from
 | |
|                     # the `profiles` table to ensure we don't leak their per-room
 | |
|                     # profiles. It also means we write local users to this table
 | |
|                     # exactly once, rather than once for every room they're in.
 | |
|                     if self.hs.is_mine_id(user_id):
 | |
|                         continue
 | |
|                     # TODO `users_with_profile` above reads from the `user_directory`
 | |
|                     #   table, meaning that `profile` is bespoke to this room.
 | |
|                     #   and this leaks remote users' per-room profiles to the user directory.
 | |
|                     await self.update_profile_in_user_dir(
 | |
|                         user_id, profile.display_name, profile.avatar_url
 | |
|                     )
 | |
| 
 | |
|                 # Now update the room sharing tables to include this room.
 | |
|                 is_public = await self.is_room_world_readable_or_publicly_joinable(
 | |
|                     room_id
 | |
|                 )
 | |
|                 if is_public:
 | |
|                     if users_with_profile:
 | |
|                         await self.add_users_in_public_rooms(
 | |
|                             room_id, users_with_profile.keys()
 | |
|                         )
 | |
|                 else:
 | |
|                     to_insert = set()
 | |
|                     for user_id in users_with_profile:
 | |
|                         # We want the set of pairs (L, M) where L and M are
 | |
|                         # in `users_with_profile` and L is local.
 | |
|                         # Do so by looking for the local user L first.
 | |
|                         if not self.hs.is_mine_id(user_id):
 | |
|                             continue
 | |
| 
 | |
|                         for other_user_id in users_with_profile:
 | |
|                             if user_id == other_user_id:
 | |
|                                 continue
 | |
| 
 | |
|                             user_set = (user_id, other_user_id)
 | |
|                             to_insert.add(user_set)
 | |
| 
 | |
|                             # If it gets too big, stop and write to the database
 | |
|                             # to prevent storing too much in RAM.
 | |
|                             if len(to_insert) >= self.SHARE_PRIVATE_WORKING_SET:
 | |
|                                 await self.add_users_who_share_private_room(
 | |
|                                     room_id, to_insert
 | |
|                                 )
 | |
|                                 to_insert.clear()
 | |
| 
 | |
|                     if to_insert:
 | |
|                         await self.add_users_who_share_private_room(room_id, to_insert)
 | |
|                         to_insert.clear()
 | |
| 
 | |
|             # We've finished a room. Delete it from the table.
 | |
|             await self.db_pool.simple_delete_one(
 | |
|                 TEMP_TABLE + "_rooms", {"room_id": room_id}
 | |
|             )
 | |
|             # Update the remaining counter.
 | |
|             progress["remaining"] -= 1
 | |
|             await self.db_pool.runInteraction(
 | |
|                 "populate_user_directory",
 | |
|                 self.db_pool.updates._background_update_progress_txn,
 | |
|                 "populate_user_directory_process_rooms",
 | |
|                 progress,
 | |
|             )
 | |
| 
 | |
|             processed_event_count += event_count
 | |
| 
 | |
|             if processed_event_count > batch_size:
 | |
|                 # Don't process any more rooms, we've hit our batch size.
 | |
|                 return processed_event_count
 | |
| 
 | |
|         return processed_event_count
 | |
| 
 | |
|     async def _populate_user_directory_process_users(
 | |
|         self, progress: JsonDict, batch_size: int
 | |
|     ) -> int:
 | |
|         """
 | |
|         Add all local users to the user directory.
 | |
|         """
 | |
| 
 | |
|         def _get_next_batch(txn: LoggingTransaction) -> Optional[List[str]]:
 | |
|             sql = "SELECT user_id FROM %s LIMIT %s" % (
 | |
|                 TEMP_TABLE + "_users",
 | |
|                 str(batch_size),
 | |
|             )
 | |
|             txn.execute(sql)
 | |
|             user_result = cast(List[Tuple[str]], txn.fetchall())
 | |
| 
 | |
|             if not user_result:
 | |
|                 return None
 | |
| 
 | |
|             users_to_work_on = [x[0] for x in user_result]
 | |
| 
 | |
|             # Get how many are left to process, so we can give status on how
 | |
|             # far we are in processing
 | |
|             sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
 | |
|             txn.execute(sql)
 | |
|             count_result = txn.fetchone()
 | |
|             assert count_result is not None
 | |
|             progress["remaining"] = count_result[0]
 | |
| 
 | |
|             return users_to_work_on
 | |
| 
 | |
|         users_to_work_on = await self.db_pool.runInteraction(
 | |
|             "populate_user_directory_temp_read", _get_next_batch
 | |
|         )
 | |
| 
 | |
|         # No more users -- complete the transaction.
 | |
|         if not users_to_work_on:
 | |
|             await self.db_pool.updates._end_background_update(
 | |
|                 "populate_user_directory_process_users"
 | |
|             )
 | |
|             return 1
 | |
| 
 | |
|         logger.debug(
 | |
|             "Processing the next %d users of %d remaining"
 | |
|             % (len(users_to_work_on), progress["remaining"])
 | |
|         )
 | |
| 
 | |
|         for user_id in users_to_work_on:
 | |
|             if await self.should_include_local_user_in_dir(user_id):
 | |
|                 profile = await self.get_profileinfo(get_localpart_from_id(user_id))
 | |
|                 await self.update_profile_in_user_dir(
 | |
|                     user_id, profile.display_name, profile.avatar_url
 | |
|                 )
 | |
| 
 | |
|             # We've finished processing a user. Delete it from the table.
 | |
|             await self.db_pool.simple_delete_one(
 | |
|                 TEMP_TABLE + "_users", {"user_id": user_id}
 | |
|             )
 | |
|             # Update the remaining counter.
 | |
|             progress["remaining"] -= 1
 | |
|             await self.db_pool.runInteraction(
 | |
|                 "populate_user_directory",
 | |
|                 self.db_pool.updates._background_update_progress_txn,
 | |
|                 "populate_user_directory_process_users",
 | |
|                 progress,
 | |
|             )
 | |
| 
 | |
|         return len(users_to_work_on)
 | |
| 
 | |
|     async def should_include_local_user_in_dir(self, user: str) -> bool:
 | |
|         """Certain classes of local user are omitted from the user directory.
 | |
|         Is this user one of them?
 | |
|         """
 | |
|         # We're opting to exclude the appservice sender (user defined by the
 | |
|         # `sender_localpart` in the appservice registration) even though
 | |
|         # technically it could be DM-able. In the future, this could potentially
 | |
|         # be configurable per-appservice whether the appservice sender can be
 | |
|         # contacted.
 | |
|         if self.get_app_service_by_user_id(user) is not None:
 | |
|             return False
 | |
| 
 | |
|         # We're opting to exclude appservice users (anyone matching the user
 | |
|         # namespace regex in the appservice registration) even though technically
 | |
|         # they could be DM-able. In the future, this could potentially
 | |
|         # be configurable per-appservice whether the appservice users can be
 | |
|         # contacted.
 | |
|         if self.get_if_app_services_interested_in_user(user):
 | |
|             # TODO we might want to make this configurable for each app service
 | |
|             return False
 | |
| 
 | |
|         # Support users are for diagnostics and should not appear in the user directory.
 | |
|         if await self.is_support_user(user):
 | |
|             return False
 | |
| 
 | |
|         # Deactivated users aren't contactable, so should not appear in the user directory.
 | |
|         try:
 | |
|             if await self.get_user_deactivated_status(user):
 | |
|                 return False
 | |
|         except StoreError:
 | |
|             # No such user in the users table. No need to do this when calling
 | |
|             # is_support_user---that returns False if the user is missing.
 | |
|             return False
 | |
| 
 | |
|         return True
 | |
| 
 | |
|     async def is_room_world_readable_or_publicly_joinable(self, room_id: str) -> bool:
 | |
|         """Check if the room is either world_readable or publically joinable"""
 | |
| 
 | |
|         # Create a state filter that only queries join and history state event
 | |
|         types_to_filter = (
 | |
|             (EventTypes.JoinRules, ""),
 | |
|             (EventTypes.RoomHistoryVisibility, ""),
 | |
|         )
 | |
| 
 | |
|         current_state_ids = await self.get_filtered_current_state_ids(
 | |
|             room_id, StateFilter.from_types(types_to_filter)
 | |
|         )
 | |
| 
 | |
|         join_rules_id = current_state_ids.get((EventTypes.JoinRules, ""))
 | |
|         if join_rules_id:
 | |
|             join_rule_ev = await self.get_event(join_rules_id, allow_none=True)
 | |
|             if join_rule_ev:
 | |
|                 if join_rule_ev.content.get("join_rule") == JoinRules.PUBLIC:
 | |
|                     return True
 | |
| 
 | |
|         hist_vis_id = current_state_ids.get((EventTypes.RoomHistoryVisibility, ""))
 | |
|         if hist_vis_id:
 | |
|             hist_vis_ev = await self.get_event(hist_vis_id, allow_none=True)
 | |
|             if hist_vis_ev:
 | |
|                 if (
 | |
|                     hist_vis_ev.content.get("history_visibility")
 | |
|                     == HistoryVisibility.WORLD_READABLE
 | |
|                 ):
 | |
|                     return True
 | |
| 
 | |
|         return False
 | |
| 
 | |
|     async def update_profile_in_user_dir(
 | |
|         self, user_id: str, display_name: Optional[str], avatar_url: Optional[str]
 | |
|     ) -> None:
 | |
|         """
 | |
|         Update or add a user's profile in the user directory.
 | |
|         """
 | |
|         # If the display name or avatar URL are unexpected types, overwrite them.
 | |
|         if not isinstance(display_name, str):
 | |
|             display_name = None
 | |
|         if not isinstance(avatar_url, str):
 | |
|             avatar_url = None
 | |
| 
 | |
|         def _update_profile_in_user_dir_txn(txn: LoggingTransaction) -> None:
 | |
|             self.db_pool.simple_upsert_txn(
 | |
|                 txn,
 | |
|                 table="user_directory",
 | |
|                 keyvalues={"user_id": user_id},
 | |
|                 values={"display_name": display_name, "avatar_url": avatar_url},
 | |
|                 lock=False,  # We're only inserter
 | |
|             )
 | |
| 
 | |
|             if isinstance(self.database_engine, PostgresEngine):
 | |
|                 # We weight the localpart most highly, then display name and finally
 | |
|                 # server name
 | |
|                 sql = """
 | |
|                         INSERT INTO user_directory_search(user_id, vector)
 | |
|                         VALUES (?,
 | |
|                             setweight(to_tsvector('simple', ?), 'A')
 | |
|                             || setweight(to_tsvector('simple', ?), 'D')
 | |
|                             || setweight(to_tsvector('simple', COALESCE(?, '')), 'B')
 | |
|                         ) ON CONFLICT (user_id) DO UPDATE SET vector=EXCLUDED.vector
 | |
|                     """
 | |
|                 txn.execute(
 | |
|                     sql,
 | |
|                     (
 | |
|                         user_id,
 | |
|                         get_localpart_from_id(user_id),
 | |
|                         get_domain_from_id(user_id),
 | |
|                         display_name,
 | |
|                     ),
 | |
|                 )
 | |
|             elif isinstance(self.database_engine, Sqlite3Engine):
 | |
|                 value = "%s %s" % (user_id, display_name) if display_name else user_id
 | |
|                 self.db_pool.simple_upsert_txn(
 | |
|                     txn,
 | |
|                     table="user_directory_search",
 | |
|                     keyvalues={"user_id": user_id},
 | |
|                     values={"value": value},
 | |
|                     lock=False,  # We're only inserter
 | |
|                 )
 | |
|             else:
 | |
|                 # This should be unreachable.
 | |
|                 raise Exception("Unrecognized database engine")
 | |
| 
 | |
|             txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
 | |
| 
 | |
|         await self.db_pool.runInteraction(
 | |
|             "update_profile_in_user_dir", _update_profile_in_user_dir_txn
 | |
|         )
 | |
| 
 | |
|     async def add_users_who_share_private_room(
 | |
|         self, room_id: str, user_id_tuples: Iterable[Tuple[str, str]]
 | |
|     ) -> None:
 | |
|         """Insert entries into the users_who_share_private_rooms table. The first
 | |
|         user should be a local user.
 | |
| 
 | |
|         Args:
 | |
|             room_id
 | |
|             user_id_tuples: iterable of 2-tuple of user IDs.
 | |
|         """
 | |
| 
 | |
|         await self.db_pool.simple_upsert_many(
 | |
|             table="users_who_share_private_rooms",
 | |
|             key_names=["user_id", "other_user_id", "room_id"],
 | |
|             key_values=[
 | |
|                 (user_id, other_user_id, room_id)
 | |
|                 for user_id, other_user_id in user_id_tuples
 | |
|             ],
 | |
|             value_names=(),
 | |
|             value_values=(),
 | |
|             desc="add_users_who_share_room",
 | |
|         )
 | |
| 
 | |
|     async def add_users_in_public_rooms(
 | |
|         self, room_id: str, user_ids: Iterable[str]
 | |
|     ) -> None:
 | |
|         """Insert entries into the users_in_public_rooms table.
 | |
| 
 | |
|         Args:
 | |
|             room_id
 | |
|             user_ids
 | |
|         """
 | |
| 
 | |
|         await self.db_pool.simple_upsert_many(
 | |
|             table="users_in_public_rooms",
 | |
|             key_names=["user_id", "room_id"],
 | |
|             key_values=[(user_id, room_id) for user_id in user_ids],
 | |
|             value_names=(),
 | |
|             value_values=(),
 | |
|             desc="add_users_in_public_rooms",
 | |
|         )
 | |
| 
 | |
|     async def delete_all_from_user_dir(self) -> None:
 | |
|         """Delete the entire user directory"""
 | |
| 
 | |
|         def _delete_all_from_user_dir_txn(txn: LoggingTransaction) -> None:
 | |
|             txn.execute("DELETE FROM user_directory")
 | |
|             txn.execute("DELETE FROM user_directory_search")
 | |
|             txn.execute("DELETE FROM users_in_public_rooms")
 | |
|             txn.execute("DELETE FROM users_who_share_private_rooms")
 | |
|             txn.call_after(self.get_user_in_directory.invalidate_all)
 | |
| 
 | |
|         await self.db_pool.runInteraction(
 | |
|             "delete_all_from_user_dir", _delete_all_from_user_dir_txn
 | |
|         )
 | |
| 
 | |
|     @cached()
 | |
|     async def get_user_in_directory(self, user_id: str) -> Optional[Dict[str, str]]:
 | |
|         return await self.db_pool.simple_select_one(
 | |
|             table="user_directory",
 | |
|             keyvalues={"user_id": user_id},
 | |
|             retcols=("display_name", "avatar_url"),
 | |
|             allow_none=True,
 | |
|             desc="get_user_in_directory",
 | |
|         )
 | |
| 
 | |
|     async def update_user_directory_stream_pos(self, stream_id: Optional[int]) -> None:
 | |
|         await self.db_pool.simple_update_one(
 | |
|             table="user_directory_stream_pos",
 | |
|             keyvalues={},
 | |
|             updatevalues={"stream_id": stream_id},
 | |
|             desc="update_user_directory_stream_pos",
 | |
|         )
 | |
| 
 | |
| 
 | |
| class UserDirectoryStore(UserDirectoryBackgroundUpdateStore):
 | |
|     # How many records do we calculate before sending it to
 | |
|     # add_users_who_share_private_rooms?
 | |
|     SHARE_PRIVATE_WORKING_SET = 500
 | |
| 
 | |
|     def __init__(
 | |
|         self,
 | |
|         database: DatabasePool,
 | |
|         db_conn: Connection,
 | |
|         hs: "HomeServer",
 | |
|     ) -> None:
 | |
|         super().__init__(database, db_conn, hs)
 | |
| 
 | |
|         self._prefer_local_users_in_search = (
 | |
|             hs.config.userdirectory.user_directory_search_prefer_local_users
 | |
|         )
 | |
|         self._server_name = hs.config.server.server_name
 | |
| 
 | |
|     async def remove_from_user_dir(self, user_id: str) -> None:
 | |
|         def _remove_from_user_dir_txn(txn: LoggingTransaction) -> None:
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn, table="user_directory", keyvalues={"user_id": user_id}
 | |
|             )
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn, table="user_directory_search", keyvalues={"user_id": user_id}
 | |
|             )
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn, table="users_in_public_rooms", keyvalues={"user_id": user_id}
 | |
|             )
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn,
 | |
|                 table="users_who_share_private_rooms",
 | |
|                 keyvalues={"user_id": user_id},
 | |
|             )
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn,
 | |
|                 table="users_who_share_private_rooms",
 | |
|                 keyvalues={"other_user_id": user_id},
 | |
|             )
 | |
|             txn.call_after(self.get_user_in_directory.invalidate, (user_id,))
 | |
| 
 | |
|         await self.db_pool.runInteraction(
 | |
|             "remove_from_user_dir", _remove_from_user_dir_txn
 | |
|         )
 | |
| 
 | |
|     async def get_users_in_dir_due_to_room(self, room_id: str) -> Set[str]:
 | |
|         """Get all user_ids that are in the room directory because they're
 | |
|         in the given room_id
 | |
|         """
 | |
|         user_ids_share_pub = await self.db_pool.simple_select_onecol(
 | |
|             table="users_in_public_rooms",
 | |
|             keyvalues={"room_id": room_id},
 | |
|             retcol="user_id",
 | |
|             desc="get_users_in_dir_due_to_room",
 | |
|         )
 | |
| 
 | |
|         user_ids_share_priv = await self.db_pool.simple_select_onecol(
 | |
|             table="users_who_share_private_rooms",
 | |
|             keyvalues={"room_id": room_id},
 | |
|             retcol="other_user_id",
 | |
|             desc="get_users_in_dir_due_to_room",
 | |
|         )
 | |
| 
 | |
|         user_ids = set(user_ids_share_pub)
 | |
|         user_ids.update(user_ids_share_priv)
 | |
| 
 | |
|         return user_ids
 | |
| 
 | |
|     async def remove_user_who_share_room(self, user_id: str, room_id: str) -> None:
 | |
|         """
 | |
|         Deletes entries in the users_who_share_*_rooms table. The first
 | |
|         user should be a local user.
 | |
| 
 | |
|         Args:
 | |
|             user_id
 | |
|             room_id
 | |
|         """
 | |
| 
 | |
|         def _remove_user_who_share_room_txn(txn: LoggingTransaction) -> None:
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn,
 | |
|                 table="users_who_share_private_rooms",
 | |
|                 keyvalues={"user_id": user_id, "room_id": room_id},
 | |
|             )
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn,
 | |
|                 table="users_who_share_private_rooms",
 | |
|                 keyvalues={"other_user_id": user_id, "room_id": room_id},
 | |
|             )
 | |
|             self.db_pool.simple_delete_txn(
 | |
|                 txn,
 | |
|                 table="users_in_public_rooms",
 | |
|                 keyvalues={"user_id": user_id, "room_id": room_id},
 | |
|             )
 | |
| 
 | |
|         await self.db_pool.runInteraction(
 | |
|             "remove_user_who_share_room", _remove_user_who_share_room_txn
 | |
|         )
 | |
| 
 | |
|     async def get_user_dir_rooms_user_is_in(self, user_id: str) -> List[str]:
 | |
|         """
 | |
|         Returns the rooms that a user is in.
 | |
| 
 | |
|         Args:
 | |
|             user_id(str): Must be a local user
 | |
| 
 | |
|         Returns:
 | |
|             list: user_id
 | |
|         """
 | |
|         rows = await self.db_pool.simple_select_onecol(
 | |
|             table="users_who_share_private_rooms",
 | |
|             keyvalues={"user_id": user_id},
 | |
|             retcol="room_id",
 | |
|             desc="get_rooms_user_is_in",
 | |
|         )
 | |
| 
 | |
|         pub_rows = await self.db_pool.simple_select_onecol(
 | |
|             table="users_in_public_rooms",
 | |
|             keyvalues={"user_id": user_id},
 | |
|             retcol="room_id",
 | |
|             desc="get_rooms_user_is_in",
 | |
|         )
 | |
| 
 | |
|         users = set(pub_rows)
 | |
|         users.update(rows)
 | |
|         return list(users)
 | |
| 
 | |
|     async def get_shared_rooms_for_users(
 | |
|         self, user_id: str, other_user_id: str
 | |
|     ) -> Set[str]:
 | |
|         """
 | |
|         Returns the rooms that a local user shares with another local or remote user.
 | |
| 
 | |
|         Args:
 | |
|             user_id: The MXID of a local user
 | |
|             other_user_id: The MXID of the other user
 | |
| 
 | |
|         Returns:
 | |
|             A set of room ID's that the users share.
 | |
|         """
 | |
| 
 | |
|         def _get_shared_rooms_for_users_txn(
 | |
|             txn: LoggingTransaction,
 | |
|         ) -> List[Dict[str, str]]:
 | |
|             txn.execute(
 | |
|                 """
 | |
|                 SELECT p1.room_id
 | |
|                 FROM users_in_public_rooms as p1
 | |
|                 INNER JOIN users_in_public_rooms as p2
 | |
|                     ON p1.room_id = p2.room_id
 | |
|                     AND p1.user_id = ?
 | |
|                     AND p2.user_id = ?
 | |
|                 UNION
 | |
|                 SELECT room_id
 | |
|                 FROM users_who_share_private_rooms
 | |
|                 WHERE
 | |
|                     user_id = ?
 | |
|                     AND other_user_id = ?
 | |
|                 """,
 | |
|                 (user_id, other_user_id, user_id, other_user_id),
 | |
|             )
 | |
|             rows = self.db_pool.cursor_to_dict(txn)
 | |
|             return rows
 | |
| 
 | |
|         rows = await self.db_pool.runInteraction(
 | |
|             "get_shared_rooms_for_users", _get_shared_rooms_for_users_txn
 | |
|         )
 | |
| 
 | |
|         return {row["room_id"] for row in rows}
 | |
| 
 | |
|     async def get_user_directory_stream_pos(self) -> Optional[int]:
 | |
|         """
 | |
|         Get the stream ID of the user directory stream.
 | |
| 
 | |
|         Returns:
 | |
|             The stream token or None if the initial background update hasn't happened yet.
 | |
|         """
 | |
|         return await self.db_pool.simple_select_one_onecol(
 | |
|             table="user_directory_stream_pos",
 | |
|             keyvalues={},
 | |
|             retcol="stream_id",
 | |
|             desc="get_user_directory_stream_pos",
 | |
|         )
 | |
| 
 | |
|     async def search_user_dir(
 | |
|         self, user_id: str, search_term: str, limit: int
 | |
|     ) -> JsonDict:
 | |
|         """Searches for users in directory
 | |
| 
 | |
|         Returns:
 | |
|             dict of the form::
 | |
| 
 | |
|                 {
 | |
|                     "limited": <bool>,  # whether there were more results or not
 | |
|                     "results": [  # Ordered by best match first
 | |
|                         {
 | |
|                             "user_id": <user_id>,
 | |
|                             "display_name": <display_name>,
 | |
|                             "avatar_url": <avatar_url>
 | |
|                         }
 | |
|                     ]
 | |
|                 }
 | |
|         """
 | |
| 
 | |
|         if self.hs.config.userdirectory.user_directory_search_all_users:
 | |
|             join_args = (user_id,)
 | |
|             where_clause = "user_id != ?"
 | |
|         else:
 | |
|             join_args = (user_id,)
 | |
|             where_clause = """
 | |
|                 (
 | |
|                     EXISTS (select 1 from users_in_public_rooms WHERE user_id = t.user_id)
 | |
|                     OR EXISTS (
 | |
|                         SELECT 1 FROM users_who_share_private_rooms
 | |
|                         WHERE user_id = ? AND other_user_id = t.user_id
 | |
|                     )
 | |
|                 )
 | |
|             """
 | |
| 
 | |
|         # We allow manipulating the ranking algorithm by injecting statements
 | |
|         # based on config options.
 | |
|         additional_ordering_statements = []
 | |
|         ordering_arguments: Tuple[str, ...] = ()
 | |
| 
 | |
|         if isinstance(self.database_engine, PostgresEngine):
 | |
|             full_query, exact_query, prefix_query = _parse_query_postgres(search_term)
 | |
| 
 | |
|             # If enabled, this config option will rank local users higher than those on
 | |
|             # remote instances.
 | |
|             if self._prefer_local_users_in_search:
 | |
|                 # This statement checks whether a given user's user ID contains a server name
 | |
|                 # that matches the local server
 | |
|                 statement = "* (CASE WHEN user_id LIKE ? THEN 2.0 ELSE 1.0 END)"
 | |
|                 additional_ordering_statements.append(statement)
 | |
| 
 | |
|                 ordering_arguments += ("%:" + self._server_name,)
 | |
| 
 | |
|             # We order by rank and then if they have profile info
 | |
|             # The ranking algorithm is hand tweaked for "best" results. Broadly
 | |
|             # the idea is we give a higher weight to exact matches.
 | |
|             # The array of numbers are the weights for the various part of the
 | |
|             # search: (domain, _, display name, localpart)
 | |
|             sql = """
 | |
|                 SELECT d.user_id AS user_id, display_name, avatar_url
 | |
|                 FROM user_directory_search as t
 | |
|                 INNER JOIN user_directory AS d USING (user_id)
 | |
|                 WHERE
 | |
|                     %(where_clause)s
 | |
|                     AND vector @@ to_tsquery('simple', ?)
 | |
|                 ORDER BY
 | |
|                     (CASE WHEN d.user_id IS NOT NULL THEN 4.0 ELSE 1.0 END)
 | |
|                     * (CASE WHEN display_name IS NOT NULL THEN 1.2 ELSE 1.0 END)
 | |
|                     * (CASE WHEN avatar_url IS NOT NULL THEN 1.2 ELSE 1.0 END)
 | |
|                     * (
 | |
|                         3 * ts_rank_cd(
 | |
|                             '{0.1, 0.1, 0.9, 1.0}',
 | |
|                             vector,
 | |
|                             to_tsquery('simple', ?),
 | |
|                             8
 | |
|                         )
 | |
|                         + ts_rank_cd(
 | |
|                             '{0.1, 0.1, 0.9, 1.0}',
 | |
|                             vector,
 | |
|                             to_tsquery('simple', ?),
 | |
|                             8
 | |
|                         )
 | |
|                     )
 | |
|                     %(order_case_statements)s
 | |
|                     DESC,
 | |
|                     display_name IS NULL,
 | |
|                     avatar_url IS NULL
 | |
|                 LIMIT ?
 | |
|             """ % {
 | |
|                 "where_clause": where_clause,
 | |
|                 "order_case_statements": " ".join(additional_ordering_statements),
 | |
|             }
 | |
|             args = (
 | |
|                 join_args
 | |
|                 + (full_query, exact_query, prefix_query)
 | |
|                 + ordering_arguments
 | |
|                 + (limit + 1,)
 | |
|             )
 | |
|         elif isinstance(self.database_engine, Sqlite3Engine):
 | |
|             search_query = _parse_query_sqlite(search_term)
 | |
| 
 | |
|             # If enabled, this config option will rank local users higher than those on
 | |
|             # remote instances.
 | |
|             if self._prefer_local_users_in_search:
 | |
|                 # This statement checks whether a given user's user ID contains a server name
 | |
|                 # that matches the local server
 | |
|                 #
 | |
|                 # Note that we need to include a comma at the end for valid SQL
 | |
|                 statement = "user_id LIKE ? DESC,"
 | |
|                 additional_ordering_statements.append(statement)
 | |
| 
 | |
|                 ordering_arguments += ("%:" + self._server_name,)
 | |
| 
 | |
|             sql = """
 | |
|                 SELECT d.user_id AS user_id, display_name, avatar_url
 | |
|                 FROM user_directory_search as t
 | |
|                 INNER JOIN user_directory AS d USING (user_id)
 | |
|                 WHERE
 | |
|                     %(where_clause)s
 | |
|                     AND value MATCH ?
 | |
|                 ORDER BY
 | |
|                     rank(matchinfo(user_directory_search)) DESC,
 | |
|                     %(order_statements)s
 | |
|                     display_name IS NULL,
 | |
|                     avatar_url IS NULL
 | |
|                 LIMIT ?
 | |
|             """ % {
 | |
|                 "where_clause": where_clause,
 | |
|                 "order_statements": " ".join(additional_ordering_statements),
 | |
|             }
 | |
|             args = join_args + (search_query,) + ordering_arguments + (limit + 1,)
 | |
|         else:
 | |
|             # This should be unreachable.
 | |
|             raise Exception("Unrecognized database engine")
 | |
| 
 | |
|         results = await self.db_pool.execute(
 | |
|             "search_user_dir", self.db_pool.cursor_to_dict, sql, *args
 | |
|         )
 | |
| 
 | |
|         limited = len(results) > limit
 | |
| 
 | |
|         return {"limited": limited, "results": results}
 | |
| 
 | |
| 
 | |
| def _parse_query_sqlite(search_term: str) -> str:
 | |
|     """Takes a plain unicode string from the user and converts it into a form
 | |
|     that can be passed to database.
 | |
|     We use this so that we can add prefix matching, which isn't something
 | |
|     that is supported by default.
 | |
| 
 | |
|     We specifically add both a prefix and non prefix matching term so that
 | |
|     exact matches get ranked higher.
 | |
|     """
 | |
| 
 | |
|     # Pull out the individual words, discarding any non-word characters.
 | |
|     results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 | |
|     return " & ".join("(%s* OR %s)" % (result, result) for result in results)
 | |
| 
 | |
| 
 | |
| def _parse_query_postgres(search_term: str) -> Tuple[str, str, str]:
 | |
|     """Takes a plain unicode string from the user and converts it into a form
 | |
|     that can be passed to database.
 | |
|     We use this so that we can add prefix matching, which isn't something
 | |
|     that is supported by default.
 | |
|     """
 | |
| 
 | |
|     # Pull out the individual words, discarding any non-word characters.
 | |
|     results = re.findall(r"([\w\-]+)", search_term, re.UNICODE)
 | |
| 
 | |
|     both = " & ".join("(%s:* | %s)" % (result, result) for result in results)
 | |
|     exact = " & ".join("%s" % (result,) for result in results)
 | |
|     prefix = " & ".join("%s:*" % (result,) for result in results)
 | |
| 
 | |
|     return both, exact, prefix
 |