From 15b2a50817348c8e25a8e5f420b3dfd20451c6d6 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Fri, 11 Oct 2019 09:15:56 +0100 Subject: [PATCH] Add some randomness to the high-cpu backoff hack --- synapse/federation/sender/__init__.py | 4 ++-- synapse/federation/sender/per_destination_queue.py | 12 ++++++++++-- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/synapse/federation/sender/__init__.py b/synapse/federation/sender/__init__.py index f23bbf0e1f..788b26446d 100644 --- a/synapse/federation/sender/__init__.py +++ b/synapse/federation/sender/__init__.py @@ -165,8 +165,8 @@ class FederationSender(object): and not self._transaction_manager.deprioritise_transmission ): logger.warning( - "Event processing loop is getting behind: deprioritising " - "transaction transmission" + "Event queue is getting behind: deprioritising transaction " + "transmission" ) self._transaction_manager.deprioritise_transmission = True diff --git a/synapse/federation/sender/per_destination_queue.py b/synapse/federation/sender/per_destination_queue.py index b890aaf840..69a6f47b7b 100644 --- a/synapse/federation/sender/per_destination_queue.py +++ b/synapse/federation/sender/per_destination_queue.py @@ -15,6 +15,7 @@ # limitations under the License. import datetime import logging +import random from prometheus_client import Counter @@ -36,6 +37,8 @@ from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter # This is defined in the Matrix spec and enforced by the receiver. MAX_EDUS_PER_TRANSACTION = 100 +DEPRIORITISE_SLEEP_TIME = 10 + logger = logging.getLogger(__name__) @@ -191,8 +194,13 @@ class PerDestinationQueue(object): while True: if self._transaction_manager.deprioritise_transmission: # if the event-processing loop has got behind, sleep to give it - # a chance to catch up - yield self._clock.sleep(2) + # a chance to catch up. Add some randomness so that the transmitters + # don't all wake up in sync. + sleeptime = random.uniform( + DEPRIORITISE_SLEEP_TIME, DEPRIORITISE_SLEEP_TIME * 2 + ) + logger.info("TX [%s]: sleeping for %f seconds", sleeptime) + yield self._clock.sleep(sleeptime) # We have to keep 2 free slots for presence and rr_edus limit = MAX_EDUS_PER_TRANSACTION - 2