User directory background update speedup (#15435)
c.f. #15264 The two changes are: 1. Add indexes so that the select / deletes don't do sequential scans 2. Don't repeatedly call `SELECT count(*)` each iteration, as that's slowpull/15441/head
parent
dabbb94faf
commit
b5192355f6
|
@ -0,0 +1 @@
|
||||||
|
Speed up the user directory background update.
|
|
@ -102,44 +102,34 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
) -> int:
|
) -> int:
|
||||||
# Get all the rooms that we want to process.
|
# Get all the rooms that we want to process.
|
||||||
def _make_staging_area(txn: LoggingTransaction) -> None:
|
def _make_staging_area(txn: LoggingTransaction) -> None:
|
||||||
sql = (
|
sql = f"""
|
||||||
"CREATE TABLE IF NOT EXISTS "
|
CREATE TABLE IF NOT EXISTS {TEMP_TABLE}_rooms AS
|
||||||
+ TEMP_TABLE
|
SELECT room_id, count(*) AS events
|
||||||
+ "_rooms(room_id TEXT NOT NULL, events BIGINT NOT NULL)"
|
FROM current_state_events
|
||||||
)
|
|
||||||
txn.execute(sql)
|
|
||||||
|
|
||||||
sql = (
|
|
||||||
"CREATE TABLE IF NOT EXISTS "
|
|
||||||
+ TEMP_TABLE
|
|
||||||
+ "_position(position TEXT NOT NULL)"
|
|
||||||
)
|
|
||||||
txn.execute(sql)
|
|
||||||
|
|
||||||
# Get rooms we want to process from the database
|
|
||||||
sql = """
|
|
||||||
SELECT room_id, count(*) FROM current_state_events
|
|
||||||
GROUP BY room_id
|
GROUP BY room_id
|
||||||
"""
|
"""
|
||||||
txn.execute(sql)
|
txn.execute(sql)
|
||||||
rooms = list(txn.fetchall())
|
txn.execute(
|
||||||
self.db_pool.simple_insert_many_txn(
|
f"CREATE INDEX IF NOT EXISTS {TEMP_TABLE}_rooms_rm ON {TEMP_TABLE}_rooms (room_id)"
|
||||||
txn, TEMP_TABLE + "_rooms", keys=("room_id", "events"), values=rooms
|
)
|
||||||
|
txn.execute(
|
||||||
|
f"CREATE INDEX IF NOT EXISTS {TEMP_TABLE}_rooms_evs ON {TEMP_TABLE}_rooms (events)"
|
||||||
)
|
)
|
||||||
del rooms
|
|
||||||
|
|
||||||
sql = (
|
sql = f"""
|
||||||
"CREATE TABLE IF NOT EXISTS "
|
CREATE TABLE IF NOT EXISTS {TEMP_TABLE}_position (
|
||||||
+ TEMP_TABLE
|
position TEXT NOT NULL
|
||||||
+ "_users(user_id TEXT NOT NULL)"
|
|
||||||
)
|
)
|
||||||
|
"""
|
||||||
txn.execute(sql)
|
txn.execute(sql)
|
||||||
|
|
||||||
txn.execute("SELECT name FROM users")
|
sql = f"""
|
||||||
users = list(txn.fetchall())
|
CREATE TABLE IF NOT EXISTS {TEMP_TABLE}_users AS
|
||||||
|
SELECT name AS user_id FROM users
|
||||||
self.db_pool.simple_insert_many_txn(
|
"""
|
||||||
txn, TEMP_TABLE + "_users", keys=("user_id",), values=users
|
txn.execute(sql)
|
||||||
|
txn.execute(
|
||||||
|
f"CREATE INDEX IF NOT EXISTS {TEMP_TABLE}_users_idx ON {TEMP_TABLE}_users (user_id)"
|
||||||
)
|
)
|
||||||
|
|
||||||
new_pos = await self.get_max_stream_id_in_current_state_deltas()
|
new_pos = await self.get_max_stream_id_in_current_state_deltas()
|
||||||
|
@ -222,6 +212,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
if not rooms_to_work_on:
|
if not rooms_to_work_on:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
if "remaining" not in progress:
|
||||||
# Get how many are left to process, so we can give status on how
|
# Get how many are left to process, so we can give status on how
|
||||||
# far we are in processing
|
# far we are in processing
|
||||||
txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
|
txn.execute("SELECT COUNT(*) FROM " + TEMP_TABLE + "_rooms")
|
||||||
|
@ -332,7 +323,14 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
|
|
||||||
if processed_event_count > batch_size:
|
if processed_event_count > batch_size:
|
||||||
# Don't process any more rooms, we've hit our batch size.
|
# Don't process any more rooms, we've hit our batch size.
|
||||||
return processed_event_count
|
break
|
||||||
|
|
||||||
|
await self.db_pool.runInteraction(
|
||||||
|
"populate_user_directory",
|
||||||
|
self.db_pool.updates._background_update_progress_txn,
|
||||||
|
"populate_user_directory_process_rooms",
|
||||||
|
progress,
|
||||||
|
)
|
||||||
|
|
||||||
return processed_event_count
|
return processed_event_count
|
||||||
|
|
||||||
|
@ -356,6 +354,7 @@ class UserDirectoryBackgroundUpdateStore(StateDeltasStore):
|
||||||
|
|
||||||
users_to_work_on = [x[0] for x in user_result]
|
users_to_work_on = [x[0] for x in user_result]
|
||||||
|
|
||||||
|
if "remaining" not in progress:
|
||||||
# Get how many are left to process, so we can give status on how
|
# Get how many are left to process, so we can give status on how
|
||||||
# far we are in processing
|
# far we are in processing
|
||||||
sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
|
sql = "SELECT COUNT(*) FROM " + TEMP_TABLE + "_users"
|
||||||
|
|
Loading…
Reference in New Issue