Add some randomness to the high-cpu backoff hack

michaelkaye/matrix_org_hotfixes_increase_replication_timeout
Richard van der Hoff 2019-10-11 09:15:56 +01:00
parent b852a8247d
commit 15b2a50817
2 changed files with 12 additions and 4 deletions

View File

@ -165,8 +165,8 @@ class FederationSender(object):
and not self._transaction_manager.deprioritise_transmission and not self._transaction_manager.deprioritise_transmission
): ):
logger.warning( logger.warning(
"Event processing loop is getting behind: deprioritising " "Event queue is getting behind: deprioritising transaction "
"transaction transmission" "transmission"
) )
self._transaction_manager.deprioritise_transmission = True self._transaction_manager.deprioritise_transmission = True

View File

@ -15,6 +15,7 @@
# limitations under the License. # limitations under the License.
import datetime import datetime
import logging import logging
import random
from prometheus_client import Counter from prometheus_client import Counter
@ -36,6 +37,8 @@ from synapse.util.retryutils import NotRetryingDestination, get_retry_limiter
# This is defined in the Matrix spec and enforced by the receiver. # This is defined in the Matrix spec and enforced by the receiver.
MAX_EDUS_PER_TRANSACTION = 100 MAX_EDUS_PER_TRANSACTION = 100
DEPRIORITISE_SLEEP_TIME = 10
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
@ -191,8 +194,13 @@ class PerDestinationQueue(object):
while True: while True:
if self._transaction_manager.deprioritise_transmission: if self._transaction_manager.deprioritise_transmission:
# if the event-processing loop has got behind, sleep to give it # if the event-processing loop has got behind, sleep to give it
# a chance to catch up # a chance to catch up. Add some randomness so that the transmitters
yield self._clock.sleep(2) # don't all wake up in sync.
sleeptime = random.uniform(
DEPRIORITISE_SLEEP_TIME, DEPRIORITISE_SLEEP_TIME * 2
)
logger.info("TX [%s]: sleeping for %f seconds", sleeptime)
yield self._clock.sleep(sleeptime)
# We have to keep 2 free slots for presence and rr_edus # We have to keep 2 free slots for presence and rr_edus
limit = MAX_EDUS_PER_TRANSACTION - 2 limit = MAX_EDUS_PER_TRANSACTION - 2