Reload cache factors from disk on SIGHUP (#12673)
parent
a559c8b0d9
commit
d38d242411
|
@ -0,0 +1 @@
|
|||
Synapse will now reload [cache config](https://matrix-org.github.io/synapse/latest/usage/configuration/config_documentation.html#caching) when it receives a [SIGHUP](https://en.wikipedia.org/wiki/SIGHUP) signal.
|
|
@ -730,6 +730,12 @@ retention:
|
|||
# A cache 'factor' is a multiplier that can be applied to each of
|
||||
# Synapse's caches in order to increase or decrease the maximum
|
||||
# number of entries that can be stored.
|
||||
#
|
||||
# The configuration for cache factors (caches.global_factor and
|
||||
# caches.per_cache_factors) can be reloaded while the application is running,
|
||||
# by sending a SIGHUP signal to the Synapse process. Changes to other parts of
|
||||
# the caching config will NOT be applied after a SIGHUP is received; a restart
|
||||
# is necessary.
|
||||
|
||||
# The number of events to cache in memory. Not affected by
|
||||
# caches.global_factor.
|
||||
|
|
|
@ -1130,6 +1130,23 @@ caches:
|
|||
expire_caches: false
|
||||
sync_response_cache_duration: 2m
|
||||
```
|
||||
|
||||
### Reloading cache factors
|
||||
|
||||
The cache factors (i.e. `caches.global_factor` and `caches.per_cache_factors`) may be reloaded at any time by sending a
|
||||
[`SIGHUP`](https://en.wikipedia.org/wiki/SIGHUP) signal to Synapse using e.g.
|
||||
|
||||
```commandline
|
||||
kill -HUP [PID_OF_SYNAPSE_PROCESS]
|
||||
```
|
||||
|
||||
If you are running multiple workers, you must individually update the worker
|
||||
config file and send this signal to each worker process.
|
||||
|
||||
If you're using the [example systemd service](https://github.com/matrix-org/synapse/blob/develop/contrib/systemd/matrix-synapse.service)
|
||||
file in Synapse's `contrib` directory, you can send a `SIGHUP` signal by using
|
||||
`systemctl reload matrix-synapse`.
|
||||
|
||||
---
|
||||
## Database ##
|
||||
Config options related to database settings.
|
||||
|
|
|
@ -49,9 +49,12 @@ from twisted.logger import LoggingFile, LogLevel
|
|||
from twisted.protocols.tls import TLSMemoryBIOFactory
|
||||
from twisted.python.threadpool import ThreadPool
|
||||
|
||||
import synapse.util.caches
|
||||
from synapse.api.constants import MAX_PDU_SIZE
|
||||
from synapse.app import check_bind_error
|
||||
from synapse.app.phone_stats_home import start_phone_stats_home
|
||||
from synapse.config import ConfigError
|
||||
from synapse.config._base import format_config_error
|
||||
from synapse.config.homeserver import HomeServerConfig
|
||||
from synapse.config.server import ManholeConfig
|
||||
from synapse.crypto import context_factory
|
||||
|
@ -432,6 +435,10 @@ async def start(hs: "HomeServer") -> None:
|
|||
signal.signal(signal.SIGHUP, run_sighup)
|
||||
|
||||
register_sighup(refresh_certificate, hs)
|
||||
register_sighup(reload_cache_config, hs.config)
|
||||
|
||||
# Apply the cache config.
|
||||
hs.config.caches.resize_all_caches()
|
||||
|
||||
# Load the certificate from disk.
|
||||
refresh_certificate(hs)
|
||||
|
@ -486,6 +493,43 @@ async def start(hs: "HomeServer") -> None:
|
|||
atexit.register(gc.freeze)
|
||||
|
||||
|
||||
def reload_cache_config(config: HomeServerConfig) -> None:
|
||||
"""Reload cache config from disk and immediately apply it.resize caches accordingly.
|
||||
|
||||
If the config is invalid, a `ConfigError` is logged and no changes are made.
|
||||
|
||||
Otherwise, this:
|
||||
- replaces the `caches` section on the given `config` object,
|
||||
- resizes all caches according to the new cache factors, and
|
||||
|
||||
Note that the following cache config keys are read, but not applied:
|
||||
- event_cache_size: used to set a max_size and _original_max_size on
|
||||
EventsWorkerStore._get_event_cache when it is created. We'd have to update
|
||||
the _original_max_size (and maybe
|
||||
- sync_response_cache_duration: would have to update the timeout_sec attribute on
|
||||
HomeServer -> SyncHandler -> ResponseCache.
|
||||
- track_memory_usage. This affects synapse.util.caches.TRACK_MEMORY_USAGE which
|
||||
influences Synapse's self-reported metrics.
|
||||
|
||||
Also, the HTTPConnectionPool in SimpleHTTPClient sets its maxPersistentPerHost
|
||||
parameter based on the global_factor. This won't be applied on a config reload.
|
||||
"""
|
||||
try:
|
||||
previous_cache_config = config.reload_config_section("caches")
|
||||
except ConfigError as e:
|
||||
logger.warning("Failed to reload cache config")
|
||||
for f in format_config_error(e):
|
||||
logger.warning(f)
|
||||
else:
|
||||
logger.debug(
|
||||
"New cache config. Was:\n %s\nNow:\n",
|
||||
previous_cache_config.__dict__,
|
||||
config.caches.__dict__,
|
||||
)
|
||||
synapse.util.caches.TRACK_MEMORY_USAGE = config.caches.track_memory_usage
|
||||
config.caches.resize_all_caches()
|
||||
|
||||
|
||||
def setup_sentry(hs: "HomeServer") -> None:
|
||||
"""Enable sentry integration, if enabled in configuration"""
|
||||
|
||||
|
|
|
@ -16,7 +16,7 @@
|
|||
import logging
|
||||
import os
|
||||
import sys
|
||||
from typing import Dict, Iterable, Iterator, List
|
||||
from typing import Dict, Iterable, List
|
||||
|
||||
from matrix_common.versionstring import get_distribution_version_string
|
||||
|
||||
|
@ -45,7 +45,7 @@ from synapse.app._base import (
|
|||
redirect_stdio_to_logs,
|
||||
register_start,
|
||||
)
|
||||
from synapse.config._base import ConfigError
|
||||
from synapse.config._base import ConfigError, format_config_error
|
||||
from synapse.config.emailconfig import ThreepidBehaviour
|
||||
from synapse.config.homeserver import HomeServerConfig
|
||||
from synapse.config.server import ListenerConfig
|
||||
|
@ -399,38 +399,6 @@ def setup(config_options: List[str]) -> SynapseHomeServer:
|
|||
return hs
|
||||
|
||||
|
||||
def format_config_error(e: ConfigError) -> Iterator[str]:
|
||||
"""
|
||||
Formats a config error neatly
|
||||
|
||||
The idea is to format the immediate error, plus the "causes" of those errors,
|
||||
hopefully in a way that makes sense to the user. For example:
|
||||
|
||||
Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
|
||||
Failed to parse config for module 'JinjaOidcMappingProvider':
|
||||
invalid jinja template:
|
||||
unexpected end of template, expected 'end of print statement'.
|
||||
|
||||
Args:
|
||||
e: the error to be formatted
|
||||
|
||||
Returns: An iterator which yields string fragments to be formatted
|
||||
"""
|
||||
yield "Error in configuration"
|
||||
|
||||
if e.path:
|
||||
yield " at '%s'" % (".".join(e.path),)
|
||||
|
||||
yield ":\n %s" % (e.msg,)
|
||||
|
||||
parent_e = e.__cause__
|
||||
indent = 1
|
||||
while parent_e:
|
||||
indent += 1
|
||||
yield ":\n%s%s" % (" " * indent, str(parent_e))
|
||||
parent_e = parent_e.__cause__
|
||||
|
||||
|
||||
def run(hs: HomeServer) -> None:
|
||||
_base.start_reactor(
|
||||
"synapse-homeserver",
|
||||
|
|
|
@ -16,14 +16,18 @@
|
|||
|
||||
import argparse
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
from hashlib import sha256
|
||||
from textwrap import dedent
|
||||
from typing import (
|
||||
Any,
|
||||
ClassVar,
|
||||
Collection,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
|
@ -40,6 +44,8 @@ import yaml
|
|||
|
||||
from synapse.util.templates import _create_mxc_to_http_filter, _format_ts_filter
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
class ConfigError(Exception):
|
||||
"""Represents a problem parsing the configuration
|
||||
|
@ -55,6 +61,38 @@ class ConfigError(Exception):
|
|||
self.path = path
|
||||
|
||||
|
||||
def format_config_error(e: ConfigError) -> Iterator[str]:
|
||||
"""
|
||||
Formats a config error neatly
|
||||
|
||||
The idea is to format the immediate error, plus the "causes" of those errors,
|
||||
hopefully in a way that makes sense to the user. For example:
|
||||
|
||||
Error in configuration at 'oidc_config.user_mapping_provider.config.display_name_template':
|
||||
Failed to parse config for module 'JinjaOidcMappingProvider':
|
||||
invalid jinja template:
|
||||
unexpected end of template, expected 'end of print statement'.
|
||||
|
||||
Args:
|
||||
e: the error to be formatted
|
||||
|
||||
Returns: An iterator which yields string fragments to be formatted
|
||||
"""
|
||||
yield "Error in configuration"
|
||||
|
||||
if e.path:
|
||||
yield " at '%s'" % (".".join(e.path),)
|
||||
|
||||
yield ":\n %s" % (e.msg,)
|
||||
|
||||
parent_e = e.__cause__
|
||||
indent = 1
|
||||
while parent_e:
|
||||
indent += 1
|
||||
yield ":\n%s%s" % (" " * indent, str(parent_e))
|
||||
parent_e = parent_e.__cause__
|
||||
|
||||
|
||||
# We split these messages out to allow packages to override with package
|
||||
# specific instructions.
|
||||
MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS = """\
|
||||
|
@ -119,7 +157,7 @@ class Config:
|
|||
defined in subclasses.
|
||||
"""
|
||||
|
||||
section: str
|
||||
section: ClassVar[str]
|
||||
|
||||
def __init__(self, root_config: "RootConfig" = None):
|
||||
self.root = root_config
|
||||
|
@ -309,9 +347,12 @@ class RootConfig:
|
|||
class, lower-cased and with "Config" removed.
|
||||
"""
|
||||
|
||||
config_classes = []
|
||||
config_classes: List[Type[Config]] = []
|
||||
|
||||
def __init__(self, config_files: Collection[str] = ()):
|
||||
# Capture absolute paths here, so we can reload config after we daemonize.
|
||||
self.config_files = [os.path.abspath(path) for path in config_files]
|
||||
|
||||
def __init__(self):
|
||||
for config_class in self.config_classes:
|
||||
if config_class.section is None:
|
||||
raise ValueError("%r requires a section name" % (config_class,))
|
||||
|
@ -512,12 +553,10 @@ class RootConfig:
|
|||
object from parser.parse_args(..)`
|
||||
"""
|
||||
|
||||
obj = cls()
|
||||
|
||||
config_args = parser.parse_args(argv)
|
||||
|
||||
config_files = find_config_files(search_paths=config_args.config_path)
|
||||
|
||||
obj = cls(config_files)
|
||||
if not config_files:
|
||||
parser.error("Must supply a config file.")
|
||||
|
||||
|
@ -627,7 +666,7 @@ class RootConfig:
|
|||
|
||||
generate_missing_configs = config_args.generate_missing_configs
|
||||
|
||||
obj = cls()
|
||||
obj = cls(config_files)
|
||||
|
||||
if config_args.generate_config:
|
||||
if config_args.report_stats is None:
|
||||
|
@ -727,6 +766,34 @@ class RootConfig:
|
|||
) -> None:
|
||||
self.invoke_all("generate_files", config_dict, config_dir_path)
|
||||
|
||||
def reload_config_section(self, section_name: str) -> Config:
|
||||
"""Reconstruct the given config section, leaving all others unchanged.
|
||||
|
||||
This works in three steps:
|
||||
|
||||
1. Create a new instance of the relevant `Config` subclass.
|
||||
2. Call `read_config` on that instance to parse the new config.
|
||||
3. Replace the existing config instance with the new one.
|
||||
|
||||
:raises ValueError: if the given `section` does not exist.
|
||||
:raises ConfigError: for any other problems reloading config.
|
||||
|
||||
:returns: the previous config object, which no longer has a reference to this
|
||||
RootConfig.
|
||||
"""
|
||||
existing_config: Optional[Config] = getattr(self, section_name, None)
|
||||
if existing_config is None:
|
||||
raise ValueError(f"Unknown config section '{section_name}'")
|
||||
logger.info("Reloading config section '%s'", section_name)
|
||||
|
||||
new_config_data = read_config_files(self.config_files)
|
||||
new_config = type(existing_config)(self)
|
||||
new_config.read_config(new_config_data)
|
||||
setattr(self, section_name, new_config)
|
||||
|
||||
existing_config.root = None
|
||||
return existing_config
|
||||
|
||||
|
||||
def read_config_files(config_files: Iterable[str]) -> Dict[str, Any]:
|
||||
"""Read the config files into a dict
|
||||
|
|
|
@ -1,15 +1,19 @@
|
|||
import argparse
|
||||
from typing import (
|
||||
Any,
|
||||
Collection,
|
||||
Dict,
|
||||
Iterable,
|
||||
Iterator,
|
||||
List,
|
||||
Literal,
|
||||
MutableMapping,
|
||||
Optional,
|
||||
Tuple,
|
||||
Type,
|
||||
TypeVar,
|
||||
Union,
|
||||
overload,
|
||||
)
|
||||
|
||||
import jinja2
|
||||
|
@ -64,6 +68,8 @@ class ConfigError(Exception):
|
|||
self.msg = msg
|
||||
self.path = path
|
||||
|
||||
def format_config_error(e: ConfigError) -> Iterator[str]: ...
|
||||
|
||||
MISSING_REPORT_STATS_CONFIG_INSTRUCTIONS: str
|
||||
MISSING_REPORT_STATS_SPIEL: str
|
||||
MISSING_SERVER_NAME: str
|
||||
|
@ -117,7 +123,8 @@ class RootConfig:
|
|||
background_updates: background_updates.BackgroundUpdateConfig
|
||||
|
||||
config_classes: List[Type["Config"]] = ...
|
||||
def __init__(self) -> None: ...
|
||||
config_files: List[str]
|
||||
def __init__(self, config_files: Collection[str] = ...) -> None: ...
|
||||
def invoke_all(
|
||||
self, func_name: str, *args: Any, **kwargs: Any
|
||||
) -> MutableMapping[str, Any]: ...
|
||||
|
@ -157,6 +164,12 @@ class RootConfig:
|
|||
def generate_missing_files(
|
||||
self, config_dict: dict, config_dir_path: str
|
||||
) -> None: ...
|
||||
@overload
|
||||
def reload_config_section(
|
||||
self, section_name: Literal["caches"]
|
||||
) -> cache.CacheConfig: ...
|
||||
@overload
|
||||
def reload_config_section(self, section_name: str) -> Config: ...
|
||||
|
||||
class Config:
|
||||
root: RootConfig
|
||||
|
|
|
@ -69,11 +69,11 @@ def _canonicalise_cache_name(cache_name: str) -> str:
|
|||
def add_resizable_cache(
|
||||
cache_name: str, cache_resize_callback: Callable[[float], None]
|
||||
) -> None:
|
||||
"""Register a cache that's size can dynamically change
|
||||
"""Register a cache whose size can dynamically change
|
||||
|
||||
Args:
|
||||
cache_name: A reference to the cache
|
||||
cache_resize_callback: A callback function that will be ran whenever
|
||||
cache_resize_callback: A callback function that will run whenever
|
||||
the cache needs to be resized
|
||||
"""
|
||||
# Some caches have '*' in them which we strip out.
|
||||
|
@ -96,6 +96,13 @@ class CacheConfig(Config):
|
|||
section = "caches"
|
||||
_environ = os.environ
|
||||
|
||||
event_cache_size: int
|
||||
cache_factors: Dict[str, float]
|
||||
global_factor: float
|
||||
track_memory_usage: bool
|
||||
expiry_time_msec: Optional[int]
|
||||
sync_response_cache_duration: int
|
||||
|
||||
@staticmethod
|
||||
def reset() -> None:
|
||||
"""Resets the caches to their defaults. Used for tests."""
|
||||
|
@ -115,6 +122,12 @@ class CacheConfig(Config):
|
|||
# A cache 'factor' is a multiplier that can be applied to each of
|
||||
# Synapse's caches in order to increase or decrease the maximum
|
||||
# number of entries that can be stored.
|
||||
#
|
||||
# The configuration for cache factors (caches.global_factor and
|
||||
# caches.per_cache_factors) can be reloaded while the application is running,
|
||||
# by sending a SIGHUP signal to the Synapse process. Changes to other parts of
|
||||
# the caching config will NOT be applied after a SIGHUP is received; a restart
|
||||
# is necessary.
|
||||
|
||||
# The number of events to cache in memory. Not affected by
|
||||
# caches.global_factor.
|
||||
|
@ -174,21 +187,21 @@ class CacheConfig(Config):
|
|||
"""
|
||||
|
||||
def read_config(self, config: JsonDict, **kwargs: Any) -> None:
|
||||
"""Populate this config object with values from `config`.
|
||||
|
||||
This method does NOT resize existing or future caches: use `resize_all_caches`.
|
||||
We use two separate methods so that we can reject bad config before applying it.
|
||||
"""
|
||||
self.event_cache_size = self.parse_size(
|
||||
config.get("event_cache_size", _DEFAULT_EVENT_CACHE_SIZE)
|
||||
)
|
||||
self.cache_factors: Dict[str, float] = {}
|
||||
self.cache_factors = {}
|
||||
|
||||
cache_config = config.get("caches") or {}
|
||||
self.global_factor = cache_config.get(
|
||||
"global_factor", properties.default_factor_size
|
||||
)
|
||||
self.global_factor = cache_config.get("global_factor", _DEFAULT_FACTOR_SIZE)
|
||||
if not isinstance(self.global_factor, (int, float)):
|
||||
raise ConfigError("caches.global_factor must be a number.")
|
||||
|
||||
# Set the global one so that it's reflected in new caches
|
||||
properties.default_factor_size = self.global_factor
|
||||
|
||||
# Load cache factors from the config
|
||||
individual_factors = cache_config.get("per_cache_factors") or {}
|
||||
if not isinstance(individual_factors, dict):
|
||||
|
@ -230,7 +243,7 @@ class CacheConfig(Config):
|
|||
cache_entry_ttl = cache_config.get("cache_entry_ttl", "30m")
|
||||
|
||||
if expire_caches:
|
||||
self.expiry_time_msec: Optional[int] = self.parse_duration(cache_entry_ttl)
|
||||
self.expiry_time_msec = self.parse_duration(cache_entry_ttl)
|
||||
else:
|
||||
self.expiry_time_msec = None
|
||||
|
||||
|
@ -254,19 +267,19 @@ class CacheConfig(Config):
|
|||
cache_config.get("sync_response_cache_duration", 0)
|
||||
)
|
||||
|
||||
# Resize all caches (if necessary) with the new factors we've loaded
|
||||
self.resize_all_caches()
|
||||
|
||||
# Store this function so that it can be called from other classes without
|
||||
# needing an instance of Config
|
||||
properties.resize_all_caches_func = self.resize_all_caches
|
||||
|
||||
def resize_all_caches(self) -> None:
|
||||
"""Ensure all cache sizes are up to date
|
||||
"""Ensure all cache sizes are up-to-date.
|
||||
|
||||
For each cache, run the mapped callback function with either
|
||||
a specific cache factor or the default, global one.
|
||||
"""
|
||||
# Set the global factor size, so that new caches are appropriately sized.
|
||||
properties.default_factor_size = self.global_factor
|
||||
|
||||
# Store this function so that it can be called from other classes without
|
||||
# needing an instance of CacheConfig
|
||||
properties.resize_all_caches_func = self.resize_all_caches
|
||||
|
||||
# block other threads from modifying _CACHES while we iterate it.
|
||||
with _CACHES_LOCK:
|
||||
for cache_name, callback in _CACHES.items():
|
||||
|
|
|
@ -348,7 +348,7 @@ class SimpleHttpClient:
|
|||
# XXX: The justification for using the cache factor here is that larger instances
|
||||
# will need both more cache and more connections.
|
||||
# Still, this should probably be a separate dial
|
||||
pool.maxPersistentPerHost = max((100 * hs.config.caches.global_factor, 5))
|
||||
pool.maxPersistentPerHost = max(int(100 * hs.config.caches.global_factor), 5)
|
||||
pool.cachedConnectionTimeout = 2 * 60
|
||||
|
||||
self.agent: IAgent = ProxyAgent(
|
||||
|
|
|
@ -38,6 +38,7 @@ class CacheConfigTests(TestCase):
|
|||
"SYNAPSE_NOT_CACHE": "BLAH",
|
||||
}
|
||||
self.config.read_config(config, config_dir_path="", data_dir_path="")
|
||||
self.config.resize_all_caches()
|
||||
|
||||
self.assertEqual(dict(self.config.cache_factors), {"something_or_other": 2.0})
|
||||
|
||||
|
@ -52,6 +53,7 @@ class CacheConfigTests(TestCase):
|
|||
"SYNAPSE_CACHE_FACTOR_FOO": 1,
|
||||
}
|
||||
self.config.read_config(config, config_dir_path="", data_dir_path="")
|
||||
self.config.resize_all_caches()
|
||||
|
||||
self.assertEqual(
|
||||
dict(self.config.cache_factors),
|
||||
|
@ -71,6 +73,7 @@ class CacheConfigTests(TestCase):
|
|||
|
||||
config = {"caches": {"per_cache_factors": {"foo": 3}}}
|
||||
self.config.read_config(config)
|
||||
self.config.resize_all_caches()
|
||||
|
||||
self.assertEqual(cache.max_size, 300)
|
||||
|
||||
|
@ -82,6 +85,7 @@ class CacheConfigTests(TestCase):
|
|||
"""
|
||||
config = {"caches": {"per_cache_factors": {"foo": 2}}}
|
||||
self.config.read_config(config, config_dir_path="", data_dir_path="")
|
||||
self.config.resize_all_caches()
|
||||
|
||||
cache = LruCache(100)
|
||||
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
|
||||
|
@ -99,6 +103,7 @@ class CacheConfigTests(TestCase):
|
|||
|
||||
config = {"caches": {"global_factor": 4}}
|
||||
self.config.read_config(config, config_dir_path="", data_dir_path="")
|
||||
self.config.resize_all_caches()
|
||||
|
||||
self.assertEqual(cache.max_size, 400)
|
||||
|
||||
|
@ -110,6 +115,7 @@ class CacheConfigTests(TestCase):
|
|||
"""
|
||||
config = {"caches": {"global_factor": 1.5}}
|
||||
self.config.read_config(config, config_dir_path="", data_dir_path="")
|
||||
self.config.resize_all_caches()
|
||||
|
||||
cache = LruCache(100)
|
||||
add_resizable_cache("foo", cache_resize_callback=cache.set_cache_factor)
|
||||
|
@ -128,6 +134,7 @@ class CacheConfigTests(TestCase):
|
|||
"SYNAPSE_CACHE_FACTOR_CACHE_B": 3,
|
||||
}
|
||||
self.config.read_config(config, config_dir_path="", data_dir_path="")
|
||||
self.config.resize_all_caches()
|
||||
|
||||
cache_a = LruCache(100)
|
||||
add_resizable_cache("*cache_a*", cache_resize_callback=cache_a.set_cache_factor)
|
||||
|
@ -148,6 +155,7 @@ class CacheConfigTests(TestCase):
|
|||
|
||||
config = {"caches": {"event_cache_size": "10k"}}
|
||||
self.config.read_config(config, config_dir_path="", data_dir_path="")
|
||||
self.config.resize_all_caches()
|
||||
|
||||
cache = LruCache(
|
||||
max_size=self.config.event_cache_size,
|
||||
|
|
|
@ -749,6 +749,7 @@ def setup_test_homeserver(
|
|||
if config is None:
|
||||
config = default_config(name, parse=True)
|
||||
|
||||
config.caches.resize_all_caches()
|
||||
config.ldap_enabled = False
|
||||
|
||||
if "clock" not in kwargs:
|
||||
|
|
Loading…
Reference in New Issue