Merge pull request #3875 from matrix-org/erikj/extra_timeouts
Add an awful secondary timeout to fix wedged requestspull/3215/merge
commit
c30cfff572
|
@ -0,0 +1 @@
|
||||||
|
Mitigate outbound federation randomly becoming wedged
|
|
@ -42,6 +42,7 @@ from synapse.api.errors import (
|
||||||
)
|
)
|
||||||
from synapse.http.endpoint import matrix_federation_endpoint
|
from synapse.http.endpoint import matrix_federation_endpoint
|
||||||
from synapse.util import logcontext
|
from synapse.util import logcontext
|
||||||
|
from synapse.util.async_helpers import timeout_no_seriously
|
||||||
from synapse.util.logcontext import make_deferred_yieldable
|
from synapse.util.logcontext import make_deferred_yieldable
|
||||||
from synapse.util.metrics import Measure
|
from synapse.util.metrics import Measure
|
||||||
|
|
||||||
|
@ -228,6 +229,16 @@ class MatrixFederationHttpClient(object):
|
||||||
)
|
)
|
||||||
request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor())
|
request_deferred.addTimeout(_sec_timeout, self.hs.get_reactor())
|
||||||
|
|
||||||
|
# Sometimes the timeout above doesn't work, so lets hack yet
|
||||||
|
# another layer of timeouts in in the vain hope that at some
|
||||||
|
# point the world made sense and this really really really
|
||||||
|
# should work.
|
||||||
|
request_deferred = timeout_no_seriously(
|
||||||
|
request_deferred,
|
||||||
|
timeout=_sec_timeout * 2,
|
||||||
|
reactor=self.hs.get_reactor(),
|
||||||
|
)
|
||||||
|
|
||||||
with Measure(self.clock, "outbound_request"):
|
with Measure(self.clock, "outbound_request"):
|
||||||
response = yield make_deferred_yieldable(
|
response = yield make_deferred_yieldable(
|
||||||
request_deferred,
|
request_deferred,
|
||||||
|
|
|
@ -438,3 +438,54 @@ def _cancelled_to_timed_out_error(value, timeout):
|
||||||
value.trap(CancelledError)
|
value.trap(CancelledError)
|
||||||
raise DeferredTimeoutError(timeout, "Deferred")
|
raise DeferredTimeoutError(timeout, "Deferred")
|
||||||
return value
|
return value
|
||||||
|
|
||||||
|
|
||||||
|
def timeout_no_seriously(deferred, timeout, reactor):
|
||||||
|
"""The in build twisted deferred addTimeout (and the method above)
|
||||||
|
completely fail to time things out under some unknown circumstances.
|
||||||
|
|
||||||
|
Lets try a different way of timing things out and maybe that will make
|
||||||
|
things work?!
|
||||||
|
|
||||||
|
TODO: Kill this with fire.
|
||||||
|
"""
|
||||||
|
|
||||||
|
new_d = defer.Deferred()
|
||||||
|
|
||||||
|
timed_out = [False]
|
||||||
|
|
||||||
|
def time_it_out():
|
||||||
|
timed_out[0] = True
|
||||||
|
deferred.cancel()
|
||||||
|
|
||||||
|
if not new_d.called:
|
||||||
|
new_d.errback(DeferredTimeoutError(timeout, "Deferred"))
|
||||||
|
|
||||||
|
delayed_call = reactor.callLater(timeout, time_it_out)
|
||||||
|
|
||||||
|
def convert_cancelled(value):
|
||||||
|
if timed_out[0]:
|
||||||
|
return _cancelled_to_timed_out_error(value, timeout)
|
||||||
|
return value
|
||||||
|
|
||||||
|
deferred.addBoth(convert_cancelled)
|
||||||
|
|
||||||
|
def cancel_timeout(result):
|
||||||
|
# stop the pending call to cancel the deferred if it's been fired
|
||||||
|
if delayed_call.active():
|
||||||
|
delayed_call.cancel()
|
||||||
|
return result
|
||||||
|
|
||||||
|
deferred.addBoth(cancel_timeout)
|
||||||
|
|
||||||
|
def success_cb(val):
|
||||||
|
if not new_d.called:
|
||||||
|
new_d.callback(val)
|
||||||
|
|
||||||
|
def failure_cb(val):
|
||||||
|
if not new_d.called:
|
||||||
|
new_d.errback(val)
|
||||||
|
|
||||||
|
deferred.addCallbacks(success_cb, failure_cb)
|
||||||
|
|
||||||
|
return new_d
|
||||||
|
|
Loading…
Reference in New Issue