Clean up exception handling in the startup code (#9059)
Factor out the exception handling in the startup code to a utility function, and fix the some logging and exit code stuff.pull/9069/head
parent
4e04435bda
commit
671138f658
|
@ -0,0 +1 @@
|
|||
Fix incorrect exit code when there is an error at startup.
|
|
@ -1,5 +1,6 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright 2017 New Vector Ltd
|
||||
# Copyright 2019-2021 The Matrix.org Foundation C.I.C
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
|
@ -19,7 +20,7 @@ import signal
|
|||
import socket
|
||||
import sys
|
||||
import traceback
|
||||
from typing import Iterable
|
||||
from typing import Awaitable, Callable, Iterable
|
||||
|
||||
from typing_extensions import NoReturn
|
||||
|
||||
|
@ -143,6 +144,45 @@ def quit_with_error(error_string: str) -> NoReturn:
|
|||
sys.exit(1)
|
||||
|
||||
|
||||
def register_start(cb: Callable[..., Awaitable], *args, **kwargs) -> None:
|
||||
"""Register a callback with the reactor, to be called once it is running
|
||||
|
||||
This can be used to initialise parts of the system which require an asynchronous
|
||||
setup.
|
||||
|
||||
Any exception raised by the callback will be printed and logged, and the process
|
||||
will exit.
|
||||
"""
|
||||
|
||||
async def wrapper():
|
||||
try:
|
||||
await cb(*args, **kwargs)
|
||||
except Exception:
|
||||
# previously, we used Failure().printTraceback() here, in the hope that
|
||||
# would give better tracebacks than traceback.print_exc(). However, that
|
||||
# doesn't handle chained exceptions (with a __cause__ or __context__) well,
|
||||
# and I *think* the need for Failure() is reduced now that we mostly use
|
||||
# async/await.
|
||||
|
||||
# Write the exception to both the logs *and* the unredirected stderr,
|
||||
# because people tend to get confused if it only goes to one or the other.
|
||||
#
|
||||
# One problem with this is that if people are using a logging config that
|
||||
# logs to the console (as is common eg under docker), they will get two
|
||||
# copies of the exception. We could maybe try to detect that, but it's
|
||||
# probably a cost we can bear.
|
||||
logger.fatal("Error during startup", exc_info=True)
|
||||
print("Error during startup:", file=sys.__stderr__)
|
||||
traceback.print_exc(file=sys.__stderr__)
|
||||
|
||||
# it's no use calling sys.exit here, since that just raises a SystemExit
|
||||
# exception which is then caught by the reactor, and everything carries
|
||||
# on as normal.
|
||||
os._exit(1)
|
||||
|
||||
reactor.callWhenRunning(lambda: defer.ensureDeferred(wrapper()))
|
||||
|
||||
|
||||
def listen_metrics(bind_addresses, port):
|
||||
"""
|
||||
Start Prometheus metrics server.
|
||||
|
@ -227,7 +267,7 @@ def refresh_certificate(hs):
|
|||
logger.info("Context factories updated.")
|
||||
|
||||
|
||||
def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
||||
async def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
||||
"""
|
||||
Start a Synapse server or worker.
|
||||
|
||||
|
@ -241,10 +281,8 @@ def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
|||
hs: homeserver instance
|
||||
listeners: Listener configuration ('listeners' in homeserver.yaml)
|
||||
"""
|
||||
try:
|
||||
# Set up the SIGHUP machinery.
|
||||
if hasattr(signal, "SIGHUP"):
|
||||
|
||||
reactor = hs.get_reactor()
|
||||
|
||||
@wrap_as_background_process("sighup")
|
||||
|
@ -304,12 +342,6 @@ def start(hs: "synapse.server.HomeServer", listeners: Iterable[ListenerConfig]):
|
|||
if sys.version_info >= (3, 7):
|
||||
gc.collect()
|
||||
gc.freeze()
|
||||
except Exception:
|
||||
traceback.print_exc(file=sys.stderr)
|
||||
reactor = hs.get_reactor()
|
||||
if reactor.running:
|
||||
reactor.stop()
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def setup_sentry(hs):
|
||||
|
|
|
@ -21,7 +21,7 @@ from typing import Dict, Iterable, Optional, Set
|
|||
|
||||
from typing_extensions import ContextManager
|
||||
|
||||
from twisted.internet import address, reactor
|
||||
from twisted.internet import address
|
||||
|
||||
import synapse
|
||||
import synapse.events
|
||||
|
@ -34,6 +34,7 @@ from synapse.api.urls import (
|
|||
SERVER_KEY_V2_PREFIX,
|
||||
)
|
||||
from synapse.app import _base
|
||||
from synapse.app._base import register_start
|
||||
from synapse.config._base import ConfigError
|
||||
from synapse.config.homeserver import HomeServerConfig
|
||||
from synapse.config.logger import setup_logging
|
||||
|
@ -960,9 +961,7 @@ def start(config_options):
|
|||
# streams. Will no-op if no streams can be written to by this worker.
|
||||
hs.get_replication_streamer()
|
||||
|
||||
reactor.addSystemEventTrigger(
|
||||
"before", "startup", _base.start, hs, config.worker_listeners
|
||||
)
|
||||
register_start(_base.start, hs, config.worker_listeners)
|
||||
|
||||
_base.start_worker_reactor("synapse-generic-worker", config)
|
||||
|
||||
|
|
|
@ -20,8 +20,7 @@ import os
|
|||
import sys
|
||||
from typing import Iterable, Iterator
|
||||
|
||||
from twisted.internet import defer, reactor
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.internet import reactor
|
||||
from twisted.web.resource import EncodingResourceWrapper, IResource
|
||||
from twisted.web.server import GzipEncoderFactory
|
||||
from twisted.web.static import File
|
||||
|
@ -38,7 +37,7 @@ from synapse.api.urls import (
|
|||
WEB_CLIENT_PREFIX,
|
||||
)
|
||||
from synapse.app import _base
|
||||
from synapse.app._base import listen_ssl, listen_tcp, quit_with_error
|
||||
from synapse.app._base import listen_ssl, listen_tcp, quit_with_error, register_start
|
||||
from synapse.config._base import ConfigError
|
||||
from synapse.config.emailconfig import ThreepidBehaviour
|
||||
from synapse.config.homeserver import HomeServerConfig
|
||||
|
@ -414,7 +413,6 @@ def setup(config_options):
|
|||
_base.refresh_certificate(hs)
|
||||
|
||||
async def start():
|
||||
try:
|
||||
# Run the ACME provisioning code, if it's enabled.
|
||||
if hs.config.acme_enabled:
|
||||
acme = hs.get_acme_handler()
|
||||
|
@ -433,21 +431,11 @@ def setup(config_options):
|
|||
await oidc.load_metadata()
|
||||
await oidc.load_jwks()
|
||||
|
||||
_base.start(hs, config.listeners)
|
||||
await _base.start(hs, config.listeners)
|
||||
|
||||
hs.get_datastore().db_pool.updates.start_doing_background_updates()
|
||||
except Exception:
|
||||
# Print the exception and bail out.
|
||||
print("Error during startup:", file=sys.stderr)
|
||||
|
||||
# this gives better tracebacks than traceback.print_exc()
|
||||
Failure().printTraceback(file=sys.stderr)
|
||||
|
||||
if reactor.running:
|
||||
reactor.stop()
|
||||
sys.exit(1)
|
||||
|
||||
reactor.callWhenRunning(lambda: defer.ensureDeferred(start()))
|
||||
register_start(start)
|
||||
|
||||
return hs
|
||||
|
||||
|
|
Loading…
Reference in New Issue