Simplify reap_monthly_active_users (#7558)

we can use `make_in_list_sql_clause` rather than doing our own half-baked
equivalent, which has the benefit of working just fine with empty lists.

(This has quite a lot of tests, so I think it's pretty safe)
pull/7575/head
Richard van der Hoff 2020-05-23 01:20:10 +01:00 committed by GitHub
parent f4269694ce
commit d14c4d6b6d
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 40 additions and 57 deletions

1
changelog.d/7558.misc Normal file
View File

@ -0,0 +1 @@
Simplify `reap_monthly_active_users`.

View File

@ -17,7 +17,7 @@ import logging
from twisted.internet import defer from twisted.internet import defer
from synapse.storage._base import SQLBaseStore from synapse.storage._base import SQLBaseStore
from synapse.storage.database import Database from synapse.storage.database import Database, make_in_list_sql_clause
from synapse.util.caches.descriptors import cached from synapse.util.caches.descriptors import cached
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -187,75 +187,57 @@ class MonthlyActiveUsersStore(MonthlyActiveUsersWorkerStore):
""" """
thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30) thirty_days_ago = int(self._clock.time_msec()) - (1000 * 60 * 60 * 24 * 30)
query_args = [thirty_days_ago]
base_sql = "DELETE FROM monthly_active_users WHERE timestamp < ?"
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres in_clause, in_clause_args = make_in_list_sql_clause(
# when len(reserved_users) == 0. Works fine on sqlite. self.database_engine, "user_id", reserved_users
if len(reserved_users) > 0: )
# questionmarks is a hack to overcome sqlite not supporting
# tuples in 'WHERE IN %s'
question_marks = ",".join("?" * len(reserved_users))
query_args.extend(reserved_users) txn.execute(
sql = base_sql + " AND user_id NOT IN ({})".format(question_marks) "DELETE FROM monthly_active_users WHERE timestamp < ? AND NOT %s"
else: % (in_clause,),
sql = base_sql [thirty_days_ago] + in_clause_args,
)
txn.execute(sql, query_args)
if self._limit_usage_by_mau: if self._limit_usage_by_mau:
# If MAU user count still exceeds the MAU threshold, then delete on # If MAU user count still exceeds the MAU threshold, then delete on
# a least recently active basis. # a least recently active basis.
# Note it is not possible to write this query using OFFSET due to # Note it is not possible to write this query using OFFSET due to
# incompatibilities in how sqlite and postgres support the feature. # incompatibilities in how sqlite and postgres support the feature.
# sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present # Sqlite requires 'LIMIT -1 OFFSET ?', the LIMIT must be present,
# While Postgres does not require 'LIMIT', but also does not support # while Postgres does not require 'LIMIT', but also does not support
# negative LIMIT values. So there is no way to write it that both can # negative LIMIT values. So there is no way to write it that both can
# support # support
if len(reserved_users) == 0:
sql = """ # Limit must be >= 0 for postgres
DELETE FROM monthly_active_users num_of_non_reserved_users_to_remove = max(
WHERE user_id NOT IN ( self._max_mau_value - len(reserved_users), 0
SELECT user_id FROM monthly_active_users )
ORDER BY timestamp DESC
LIMIT ? # It is important to filter reserved users twice to guard
) # against the case where the reserved user is present in the
""" # SELECT, meaning that a legitimate mau is deleted.
txn.execute(sql, ((self._max_mau_value),)) sql = """
# Need if/else since 'AND user_id NOT IN ({})' fails on Postgres DELETE FROM monthly_active_users
# when len(reserved_users) == 0. Works fine on sqlite. WHERE user_id NOT IN (
else: SELECT user_id FROM monthly_active_users
# Must be >= 0 for postgres WHERE NOT %s
num_of_non_reserved_users_to_remove = max( ORDER BY timestamp DESC
self._max_mau_value - len(reserved_users), 0 LIMIT ?
) )
AND NOT %s
""" % (
in_clause,
in_clause,
)
# It is important to filter reserved users twice to guard query_args = (
# against the case where the reserved user is present in the in_clause_args
# SELECT, meaning that a legitmate mau is deleted. + [num_of_non_reserved_users_to_remove]
sql = """ + in_clause_args
DELETE FROM monthly_active_users )
WHERE user_id NOT IN ( txn.execute(sql, query_args)
SELECT user_id FROM monthly_active_users
WHERE user_id NOT IN ({})
ORDER BY timestamp DESC
LIMIT ?
)
AND user_id NOT IN ({})
""".format(
question_marks, question_marks
)
query_args = [ # It seems poor to invalidate the whole cache. Postgres supports
*reserved_users,
num_of_non_reserved_users_to_remove,
*reserved_users,
]
txn.execute(sql, query_args)
# It seems poor to invalidate the whole cache, Postgres supports
# 'Returning' which would allow me to invalidate only the # 'Returning' which would allow me to invalidate only the
# specific users, but sqlite has no way to do this and instead # specific users, but sqlite has no way to do this and instead
# I would need to SELECT and the DELETE which without locking # I would need to SELECT and the DELETE which without locking