Additional functionality for declaring worker types in Complement (#14921)
Co-authored-by: Olivier Wilkinson (reivilibre) <oliverw@matrix.org>pull/15302/head
parent
e7b559d2ca
commit
003a25ae5c
|
@ -0,0 +1 @@
|
|||
Add additional functionality to declaring worker types when starting Complement in worker mode.
|
|
@ -51,8 +51,7 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then
|
|||
# -z True if the length of string is zero.
|
||||
if [[ -z "$SYNAPSE_WORKER_TYPES" ]]; then
|
||||
export SYNAPSE_WORKER_TYPES="\
|
||||
event_persister, \
|
||||
event_persister, \
|
||||
event_persister:2, \
|
||||
background_worker, \
|
||||
frontend_proxy, \
|
||||
event_creator, \
|
||||
|
@ -64,7 +63,8 @@ if [[ -n "$SYNAPSE_COMPLEMENT_USE_WORKERS" ]]; then
|
|||
synchrotron, \
|
||||
client_reader, \
|
||||
appservice, \
|
||||
pusher"
|
||||
pusher, \
|
||||
stream_writers=account_data+presence+receipts+to_device+typing"
|
||||
|
||||
fi
|
||||
log "Workers requested: $SYNAPSE_WORKER_TYPES"
|
||||
|
|
|
@ -19,8 +19,15 @@
|
|||
# The environment variables it reads are:
|
||||
# * SYNAPSE_SERVER_NAME: The desired server_name of the homeserver.
|
||||
# * SYNAPSE_REPORT_STATS: Whether to report stats.
|
||||
# * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKER_CONFIG
|
||||
# below. Leave empty for no workers.
|
||||
# * SYNAPSE_WORKER_TYPES: A comma separated list of worker names as specified in WORKERS_CONFIG
|
||||
# below. Leave empty for no workers. Add a ':' and a number at the end to
|
||||
# multiply that worker. Append multiple worker types with '+' to merge the
|
||||
# worker types into a single worker. Add a name and a '=' to the front of a
|
||||
# worker type to give this instance a name in logs and nginx.
|
||||
# Examples:
|
||||
# SYNAPSE_WORKER_TYPES='event_persister, federation_sender, client_reader'
|
||||
# SYNAPSE_WORKER_TYPES='event_persister:2, federation_sender:2, client_reader'
|
||||
# SYNAPSE_WORKER_TYPES='stream_writers=account_data+presence+typing'
|
||||
# * SYNAPSE_AS_REGISTRATION_DIR: If specified, a directory in which .yaml and .yml files
|
||||
# will be treated as Application Service registration files.
|
||||
# * SYNAPSE_TLS_CERT: Path to a TLS certificate in PEM format.
|
||||
|
@ -40,16 +47,33 @@
|
|||
|
||||
import os
|
||||
import platform
|
||||
import re
|
||||
import subprocess
|
||||
import sys
|
||||
from collections import defaultdict
|
||||
from itertools import chain
|
||||
from pathlib import Path
|
||||
from typing import Any, Dict, List, Mapping, MutableMapping, NoReturn, Optional, Set
|
||||
from typing import (
|
||||
Any,
|
||||
Dict,
|
||||
List,
|
||||
Mapping,
|
||||
MutableMapping,
|
||||
NoReturn,
|
||||
Optional,
|
||||
Set,
|
||||
SupportsIndex,
|
||||
)
|
||||
|
||||
import yaml
|
||||
from jinja2 import Environment, FileSystemLoader
|
||||
|
||||
MAIN_PROCESS_HTTP_LISTENER_PORT = 8080
|
||||
|
||||
# A simple name used as a placeholder in the WORKERS_CONFIG below. This will be replaced
|
||||
# during processing with the name of the worker.
|
||||
WORKER_PLACEHOLDER_NAME = "placeholder_name"
|
||||
|
||||
# Workers with exposed endpoints needs either "client", "federation", or "media" listener_resources
|
||||
# Watching /_matrix/client needs a "client" listener
|
||||
# Watching /_matrix/federation needs a "federation" listener
|
||||
|
@ -70,11 +94,13 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
|
|||
"endpoint_patterns": [
|
||||
"^/_matrix/client/(api/v1|r0|v3|unstable)/user_directory/search$"
|
||||
],
|
||||
"shared_extra_conf": {"update_user_directory_from_worker": "user_dir1"},
|
||||
"shared_extra_conf": {
|
||||
"update_user_directory_from_worker": WORKER_PLACEHOLDER_NAME
|
||||
},
|
||||
"worker_extra_conf": "",
|
||||
},
|
||||
"media_repository": {
|
||||
"app": "synapse.app.media_repository",
|
||||
"app": "synapse.app.generic_worker",
|
||||
"listener_resources": ["media"],
|
||||
"endpoint_patterns": [
|
||||
"^/_matrix/media/",
|
||||
|
@ -87,7 +113,7 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
|
|||
# The first configured media worker will run the media background jobs
|
||||
"shared_extra_conf": {
|
||||
"enable_media_repo": False,
|
||||
"media_instance_running_background_jobs": "media_repository1",
|
||||
"media_instance_running_background_jobs": WORKER_PLACEHOLDER_NAME,
|
||||
},
|
||||
"worker_extra_conf": "enable_media_repo: true",
|
||||
},
|
||||
|
@ -95,7 +121,9 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
|
|||
"app": "synapse.app.generic_worker",
|
||||
"listener_resources": [],
|
||||
"endpoint_patterns": [],
|
||||
"shared_extra_conf": {"notify_appservices_from_worker": "appservice1"},
|
||||
"shared_extra_conf": {
|
||||
"notify_appservices_from_worker": WORKER_PLACEHOLDER_NAME
|
||||
},
|
||||
"worker_extra_conf": "",
|
||||
},
|
||||
"federation_sender": {
|
||||
|
@ -192,9 +220,9 @@ WORKERS_CONFIG: Dict[str, Dict[str, Any]] = {
|
|||
"app": "synapse.app.generic_worker",
|
||||
"listener_resources": [],
|
||||
"endpoint_patterns": [],
|
||||
# This worker cannot be sharded. Therefore there should only ever be one background
|
||||
# worker, and it should be named background_worker1
|
||||
"shared_extra_conf": {"run_background_tasks_on": "background_worker1"},
|
||||
# This worker cannot be sharded. Therefore, there should only ever be one
|
||||
# background worker. This is enforced for the safety of your database.
|
||||
"shared_extra_conf": {"run_background_tasks_on": WORKER_PLACEHOLDER_NAME},
|
||||
"worker_extra_conf": "",
|
||||
},
|
||||
"event_creator": {
|
||||
|
@ -275,7 +303,7 @@ NGINX_LOCATION_CONFIG_BLOCK = """
|
|||
"""
|
||||
|
||||
NGINX_UPSTREAM_CONFIG_BLOCK = """
|
||||
upstream {upstream_worker_type} {{
|
||||
upstream {upstream_worker_base_name} {{
|
||||
{body}
|
||||
}}
|
||||
"""
|
||||
|
@ -326,7 +354,7 @@ def convert(src: str, dst: str, **template_vars: object) -> None:
|
|||
|
||||
def add_worker_roles_to_shared_config(
|
||||
shared_config: dict,
|
||||
worker_type: str,
|
||||
worker_types_set: Set[str],
|
||||
worker_name: str,
|
||||
worker_port: int,
|
||||
) -> None:
|
||||
|
@ -334,22 +362,36 @@ def add_worker_roles_to_shared_config(
|
|||
append appropriate worker information to it for the current worker_type instance.
|
||||
|
||||
Args:
|
||||
shared_config: The config dict that all worker instances share (after being converted to YAML)
|
||||
worker_type: The type of worker (one of those defined in WORKERS_CONFIG).
|
||||
shared_config: The config dict that all worker instances share (after being
|
||||
converted to YAML)
|
||||
worker_types_set: The type of worker (one of those defined in WORKERS_CONFIG).
|
||||
This list can be a single worker type or multiple.
|
||||
worker_name: The name of the worker instance.
|
||||
worker_port: The HTTP replication port that the worker instance is listening on.
|
||||
"""
|
||||
# The instance_map config field marks the workers that write to various replication streams
|
||||
# The instance_map config field marks the workers that write to various replication
|
||||
# streams
|
||||
instance_map = shared_config.setdefault("instance_map", {})
|
||||
|
||||
# Worker-type specific sharding config
|
||||
if worker_type == "pusher":
|
||||
# This is a list of the stream_writers that there can be only one of. Events can be
|
||||
# sharded, and therefore doesn't belong here.
|
||||
singular_stream_writers = [
|
||||
"account_data",
|
||||
"presence",
|
||||
"receipts",
|
||||
"to_device",
|
||||
"typing",
|
||||
]
|
||||
|
||||
# Worker-type specific sharding config. Now a single worker can fulfill multiple
|
||||
# roles, check each.
|
||||
if "pusher" in worker_types_set:
|
||||
shared_config.setdefault("pusher_instances", []).append(worker_name)
|
||||
|
||||
elif worker_type == "federation_sender":
|
||||
if "federation_sender" in worker_types_set:
|
||||
shared_config.setdefault("federation_sender_instances", []).append(worker_name)
|
||||
|
||||
elif worker_type == "event_persister":
|
||||
if "event_persister" in worker_types_set:
|
||||
# Event persisters write to the events stream, so we need to update
|
||||
# the list of event stream writers
|
||||
shared_config.setdefault("stream_writers", {}).setdefault("events", []).append(
|
||||
|
@ -362,19 +404,154 @@ def add_worker_roles_to_shared_config(
|
|||
"port": worker_port,
|
||||
}
|
||||
|
||||
elif worker_type in ["account_data", "presence", "receipts", "to_device", "typing"]:
|
||||
# Update the list of stream writers
|
||||
# It's convenient that the name of the worker type is the same as the stream to write
|
||||
shared_config.setdefault("stream_writers", {}).setdefault(
|
||||
worker_type, []
|
||||
).append(worker_name)
|
||||
# Update the list of stream writers. It's convenient that the name of the worker
|
||||
# type is the same as the stream to write. Iterate over the whole list in case there
|
||||
# is more than one.
|
||||
for worker in worker_types_set:
|
||||
if worker in singular_stream_writers:
|
||||
shared_config.setdefault("stream_writers", {}).setdefault(
|
||||
worker, []
|
||||
).append(worker_name)
|
||||
|
||||
# Map of stream writer instance names to host/ports combos
|
||||
# For now, all stream writers need http replication ports
|
||||
instance_map[worker_name] = {
|
||||
"host": "localhost",
|
||||
"port": worker_port,
|
||||
}
|
||||
# Map of stream writer instance names to host/ports combos
|
||||
# For now, all stream writers need http replication ports
|
||||
instance_map[worker_name] = {
|
||||
"host": "localhost",
|
||||
"port": worker_port,
|
||||
}
|
||||
|
||||
|
||||
def merge_worker_template_configs(
|
||||
existing_dict: Dict[str, Any] | None,
|
||||
to_be_merged_dict: Dict[str, Any],
|
||||
) -> Dict[str, Any]:
|
||||
"""When given an existing dict of worker template configuration consisting with both
|
||||
dicts and lists, merge new template data from WORKERS_CONFIG(or create) and
|
||||
return new dict.
|
||||
|
||||
Args:
|
||||
existing_dict: Either an existing worker template or a fresh blank one.
|
||||
to_be_merged_dict: The template from WORKERS_CONFIGS to be merged into
|
||||
existing_dict.
|
||||
Returns: The newly merged together dict values.
|
||||
"""
|
||||
new_dict: Dict[str, Any] = {}
|
||||
if not existing_dict:
|
||||
# It doesn't exist yet, just use the new dict(but take a copy not a reference)
|
||||
new_dict = to_be_merged_dict.copy()
|
||||
else:
|
||||
for i in to_be_merged_dict.keys():
|
||||
if (i == "endpoint_patterns") or (i == "listener_resources"):
|
||||
# merge the two lists, remove duplicates
|
||||
new_dict[i] = list(set(existing_dict[i] + to_be_merged_dict[i]))
|
||||
elif i == "shared_extra_conf":
|
||||
# merge dictionary's, the worker name will be replaced later
|
||||
new_dict[i] = {**existing_dict[i], **to_be_merged_dict[i]}
|
||||
elif i == "worker_extra_conf":
|
||||
# There is only one worker type that has a 'worker_extra_conf' and it is
|
||||
# the media_repo. Since duplicate worker types on the same worker don't
|
||||
# work, this is fine.
|
||||
new_dict[i] = existing_dict[i] + to_be_merged_dict[i]
|
||||
else:
|
||||
# Everything else should be identical, like "app", which only works
|
||||
# because all apps are now generic_workers.
|
||||
new_dict[i] = to_be_merged_dict[i]
|
||||
return new_dict
|
||||
|
||||
|
||||
def insert_worker_name_for_worker_config(
|
||||
existing_dict: Dict[str, Any], worker_name: str
|
||||
) -> Dict[str, Any]:
|
||||
"""Insert a given worker name into the worker's configuration dict.
|
||||
|
||||
Args:
|
||||
existing_dict: The worker_config dict that is imported into shared_config.
|
||||
worker_name: The name of the worker to insert.
|
||||
Returns: Copy of the dict with newly inserted worker name
|
||||
"""
|
||||
dict_to_edit = existing_dict.copy()
|
||||
for k, v in dict_to_edit["shared_extra_conf"].items():
|
||||
# Only proceed if it's the placeholder name string
|
||||
if v == WORKER_PLACEHOLDER_NAME:
|
||||
dict_to_edit["shared_extra_conf"][k] = worker_name
|
||||
return dict_to_edit
|
||||
|
||||
|
||||
def apply_requested_multiplier_for_worker(worker_types: List[str]) -> List[str]:
|
||||
"""
|
||||
Apply multiplier(if found) by returning a new expanded list with some basic error
|
||||
checking.
|
||||
|
||||
Args:
|
||||
worker_types: The unprocessed List of requested workers
|
||||
Returns:
|
||||
A new list with all requested workers expanded.
|
||||
"""
|
||||
# Checking performed:
|
||||
# 1. if worker:2 or more is declared, it will create additional workers up to number
|
||||
# 2. if worker:1, it will create a single copy of this worker as if no number was
|
||||
# given
|
||||
# 3. if worker:0 is declared, this worker will be ignored. This is to allow for
|
||||
# scripting and automated expansion and is intended behaviour.
|
||||
# 4. if worker:NaN or is a negative number, it will error and log it.
|
||||
new_worker_types = []
|
||||
for worker_type in worker_types:
|
||||
if ":" in worker_type:
|
||||
worker_type_components = split_and_strip_string(worker_type, ":", 1)
|
||||
worker_count = 0
|
||||
# Should only be 2 components, a type of worker(s) and an integer as a
|
||||
# string. Cast the number as an int then it can be used as a counter.
|
||||
try:
|
||||
worker_count = int(worker_type_components[1])
|
||||
except ValueError:
|
||||
error(
|
||||
f"Bad number in worker count for '{worker_type}': "
|
||||
f"'{worker_type_components[1]}' is not an integer"
|
||||
)
|
||||
|
||||
# As long as there are more than 0, we add one to the list to make below.
|
||||
for _ in range(worker_count):
|
||||
new_worker_types.append(worker_type_components[0])
|
||||
|
||||
else:
|
||||
# If it's not a real worker_type, it will error out later.
|
||||
new_worker_types.append(worker_type)
|
||||
return new_worker_types
|
||||
|
||||
|
||||
def is_sharding_allowed_for_worker_type(worker_type: str) -> bool:
|
||||
"""Helper to check to make sure worker types that cannot have multiples do not.
|
||||
|
||||
Args:
|
||||
worker_type: The type of worker to check against.
|
||||
Returns: True if allowed, False if not
|
||||
"""
|
||||
return worker_type not in [
|
||||
"background_worker",
|
||||
"account_data",
|
||||
"presence",
|
||||
"receipts",
|
||||
"typing",
|
||||
"to_device",
|
||||
]
|
||||
|
||||
|
||||
def split_and_strip_string(
|
||||
given_string: str, split_char: str, max_split: SupportsIndex = -1
|
||||
) -> List[str]:
|
||||
"""
|
||||
Helper to split a string on split_char and strip whitespace from each end of each
|
||||
element.
|
||||
Args:
|
||||
given_string: The string to split
|
||||
split_char: The character to split the string on
|
||||
max_split: kwarg for split() to limit how many times the split() happens
|
||||
Returns:
|
||||
A List of strings
|
||||
"""
|
||||
# Removes whitespace from ends of result strings before adding to list. Allow for
|
||||
# overriding 'maxsplit' kwarg, default being -1 to signify no maximum.
|
||||
return [x.strip() for x in given_string.split(split_char, maxsplit=max_split)]
|
||||
|
||||
|
||||
def generate_base_homeserver_config() -> None:
|
||||
|
@ -389,29 +566,153 @@ def generate_base_homeserver_config() -> None:
|
|||
subprocess.run(["/usr/local/bin/python", "/start.py", "migrate_config"], check=True)
|
||||
|
||||
|
||||
def parse_worker_types(
|
||||
requested_worker_types: List[str],
|
||||
) -> Dict[str, Set[str]]:
|
||||
"""Read the desired list of requested workers and prepare the data for use in
|
||||
generating worker config files while also checking for potential gotchas.
|
||||
|
||||
Args:
|
||||
requested_worker_types: The list formed from the split environment variable
|
||||
containing the unprocessed requests for workers.
|
||||
|
||||
Returns: A dict of worker names to set of worker types. Format:
|
||||
{'worker_name':
|
||||
{'worker_type', 'worker_type2'}
|
||||
}
|
||||
"""
|
||||
# A counter of worker_base_name -> int. Used for determining the name for a given
|
||||
# worker when generating its config file, as each worker's name is just
|
||||
# worker_base_name followed by instance number
|
||||
worker_base_name_counter: Dict[str, int] = defaultdict(int)
|
||||
|
||||
# Similar to above, but more finely grained. This is used to determine we don't have
|
||||
# more than a single worker for cases where multiples would be bad(e.g. presence).
|
||||
worker_type_shard_counter: Dict[str, int] = defaultdict(int)
|
||||
|
||||
# The final result of all this processing
|
||||
dict_to_return: Dict[str, Set[str]] = {}
|
||||
|
||||
# Handle any multipliers requested for given workers.
|
||||
multiple_processed_worker_types = apply_requested_multiplier_for_worker(
|
||||
requested_worker_types
|
||||
)
|
||||
|
||||
# Process each worker_type_string
|
||||
# Examples of expected formats:
|
||||
# - requested_name=type1+type2+type3
|
||||
# - synchrotron
|
||||
# - event_creator+event_persister
|
||||
for worker_type_string in multiple_processed_worker_types:
|
||||
# First, if a name is requested, use that — otherwise generate one.
|
||||
worker_base_name: str = ""
|
||||
if "=" in worker_type_string:
|
||||
# Split on "=", remove extra whitespace from ends then make list
|
||||
worker_type_split = split_and_strip_string(worker_type_string, "=")
|
||||
if len(worker_type_split) > 2:
|
||||
error(
|
||||
"There should only be one '=' in the worker type string. "
|
||||
f"Please fix: {worker_type_string}"
|
||||
)
|
||||
|
||||
# Assign the name
|
||||
worker_base_name = worker_type_split[0]
|
||||
|
||||
if not re.match(r"^[a-zA-Z0-9_+-]*[a-zA-Z_+-]$", worker_base_name):
|
||||
# Apply a fairly narrow regex to the worker names. Some characters
|
||||
# aren't safe for use in file paths or nginx configurations.
|
||||
# Don't allow to end with a number because we'll add a number
|
||||
# ourselves in a moment.
|
||||
error(
|
||||
"Invalid worker name; please choose a name consisting of "
|
||||
"alphanumeric letters, _ + -, but not ending with a digit: "
|
||||
f"{worker_base_name!r}"
|
||||
)
|
||||
|
||||
# Continue processing the remainder of the worker_type string
|
||||
# with the name override removed.
|
||||
worker_type_string = worker_type_split[1]
|
||||
|
||||
# Split the worker_type_string on "+", remove whitespace from ends then make
|
||||
# the list a set so it's deduplicated.
|
||||
worker_types_set: Set[str] = set(
|
||||
split_and_strip_string(worker_type_string, "+")
|
||||
)
|
||||
|
||||
if not worker_base_name:
|
||||
# No base name specified: generate one deterministically from set of
|
||||
# types
|
||||
worker_base_name = "+".join(sorted(worker_types_set))
|
||||
|
||||
# At this point, we have:
|
||||
# worker_base_name which is the name for the worker, without counter.
|
||||
# worker_types_set which is the set of worker types for this worker.
|
||||
|
||||
# Validate worker_type and make sure we don't allow sharding for a worker type
|
||||
# that doesn't support it. Will error and stop if it is a problem,
|
||||
# e.g. 'background_worker'.
|
||||
for worker_type in worker_types_set:
|
||||
# Verify this is a real defined worker type. If it's not, stop everything so
|
||||
# it can be fixed.
|
||||
if worker_type not in WORKERS_CONFIG:
|
||||
error(
|
||||
f"{worker_type} is an unknown worker type! Was found in "
|
||||
f"'{worker_type_string}'. Please fix!"
|
||||
)
|
||||
|
||||
if worker_type in worker_type_shard_counter:
|
||||
if not is_sharding_allowed_for_worker_type(worker_type):
|
||||
error(
|
||||
f"There can be only a single worker with {worker_type} "
|
||||
"type. Please recount and remove."
|
||||
)
|
||||
# Not in shard counter, must not have seen it yet, add it.
|
||||
worker_type_shard_counter[worker_type] += 1
|
||||
|
||||
# Generate the number for the worker using incrementing counter
|
||||
worker_base_name_counter[worker_base_name] += 1
|
||||
worker_number = worker_base_name_counter[worker_base_name]
|
||||
worker_name = f"{worker_base_name}{worker_number}"
|
||||
|
||||
if worker_number > 1:
|
||||
# If this isn't the first worker, check that we don't have a confusing
|
||||
# mixture of worker types with the same base name.
|
||||
first_worker_with_base_name = dict_to_return[f"{worker_base_name}1"]
|
||||
if first_worker_with_base_name != worker_types_set:
|
||||
error(
|
||||
f"Can not use worker_name: '{worker_name}' for worker_type(s): "
|
||||
f"{worker_types_set!r}. It is already in use by "
|
||||
f"worker_type(s): {first_worker_with_base_name!r}"
|
||||
)
|
||||
|
||||
dict_to_return[worker_name] = worker_types_set
|
||||
|
||||
return dict_to_return
|
||||
|
||||
|
||||
def generate_worker_files(
|
||||
environ: Mapping[str, str], config_path: str, data_dir: str
|
||||
environ: Mapping[str, str],
|
||||
config_path: str,
|
||||
data_dir: str,
|
||||
requested_worker_types: Dict[str, Set[str]],
|
||||
) -> None:
|
||||
"""Read the desired list of workers from environment variables and generate
|
||||
shared homeserver, nginx and supervisord configs.
|
||||
"""Read the desired workers(if any) that is passed in and generate shared
|
||||
homeserver, nginx and supervisord configs.
|
||||
|
||||
Args:
|
||||
environ: os.environ instance.
|
||||
config_path: The location of the generated Synapse main worker config file.
|
||||
data_dir: The location of the synapse data directory. Where log and
|
||||
user-facing config files live.
|
||||
requested_worker_types: A Dict containing requested workers in the format of
|
||||
{'worker_name1': {'worker_type', ...}}
|
||||
"""
|
||||
# Note that yaml cares about indentation, so care should be taken to insert lines
|
||||
# into files at the correct indentation below.
|
||||
|
||||
# shared_config is the contents of a Synapse config file that will be shared amongst
|
||||
# the main Synapse process as well as all workers.
|
||||
# It is intended mainly for disabling functionality when certain workers are spun up,
|
||||
# and adding a replication listener.
|
||||
|
||||
# First read the original config file and extract the listeners block. Then we'll add
|
||||
# another listener for replication. Later we'll write out the result to the shared
|
||||
# config file.
|
||||
# First read the original config file and extract the listeners block. Then we'll
|
||||
# add another listener for replication. Later we'll write out the result to the
|
||||
# shared config file.
|
||||
listeners = [
|
||||
{
|
||||
"port": 9093,
|
||||
|
@ -427,9 +728,9 @@ def generate_worker_files(
|
|||
listeners += original_listeners
|
||||
|
||||
# The shared homeserver config. The contents of which will be inserted into the
|
||||
# base shared worker jinja2 template.
|
||||
#
|
||||
# This config file will be passed to all workers, included Synapse's main process.
|
||||
# base shared worker jinja2 template. This config file will be passed to all
|
||||
# workers, included Synapse's main process. It is intended mainly for disabling
|
||||
# functionality when certain workers are spun up, and adding a replication listener.
|
||||
shared_config: Dict[str, Any] = {"listeners": listeners}
|
||||
|
||||
# List of dicts that describe workers.
|
||||
|
@ -437,31 +738,20 @@ def generate_worker_files(
|
|||
# program blocks.
|
||||
worker_descriptors: List[Dict[str, Any]] = []
|
||||
|
||||
# Upstreams for load-balancing purposes. This dict takes the form of a worker type to the
|
||||
# ports of each worker. For example:
|
||||
# Upstreams for load-balancing purposes. This dict takes the form of the worker
|
||||
# type to the ports of each worker. For example:
|
||||
# {
|
||||
# worker_type: {1234, 1235, ...}}
|
||||
# }
|
||||
# and will be used to construct 'upstream' nginx directives.
|
||||
nginx_upstreams: Dict[str, Set[int]] = {}
|
||||
|
||||
# A map of: {"endpoint": "upstream"}, where "upstream" is a str representing what will be
|
||||
# placed after the proxy_pass directive. The main benefit to representing this data as a
|
||||
# dict over a str is that we can easily deduplicate endpoints across multiple instances
|
||||
# of the same worker.
|
||||
#
|
||||
# An nginx site config that will be amended to depending on the workers that are
|
||||
# spun up. To be placed in /etc/nginx/conf.d.
|
||||
nginx_locations = {}
|
||||
|
||||
# Read the desired worker configuration from the environment
|
||||
worker_types_env = environ.get("SYNAPSE_WORKER_TYPES", "").strip()
|
||||
if not worker_types_env:
|
||||
# No workers, just the main process
|
||||
worker_types = []
|
||||
else:
|
||||
# Split type names by comma, ignoring whitespace.
|
||||
worker_types = [x.strip() for x in worker_types_env.split(",")]
|
||||
# A map of: {"endpoint": "upstream"}, where "upstream" is a str representing what
|
||||
# will be placed after the proxy_pass directive. The main benefit to representing
|
||||
# this data as a dict over a str is that we can easily deduplicate endpoints
|
||||
# across multiple instances of the same worker. The final rendering will be combined
|
||||
# with nginx_upstreams and placed in /etc/nginx/conf.d.
|
||||
nginx_locations: Dict[str, str] = {}
|
||||
|
||||
# Create the worker configuration directory if it doesn't already exist
|
||||
os.makedirs("/conf/workers", exist_ok=True)
|
||||
|
@ -469,66 +759,57 @@ def generate_worker_files(
|
|||
# Start worker ports from this arbitrary port
|
||||
worker_port = 18009
|
||||
|
||||
# A counter of worker_type -> int. Used for determining the name for a given
|
||||
# worker type when generating its config file, as each worker's name is just
|
||||
# worker_type + instance #
|
||||
worker_type_counter: Dict[str, int] = {}
|
||||
|
||||
# A list of internal endpoints to healthcheck, starting with the main process
|
||||
# which exists even if no workers do.
|
||||
healthcheck_urls = ["http://localhost:8080/health"]
|
||||
|
||||
# For each worker type specified by the user, create config values
|
||||
for worker_type in worker_types:
|
||||
worker_config = WORKERS_CONFIG.get(worker_type)
|
||||
if worker_config:
|
||||
worker_config = worker_config.copy()
|
||||
else:
|
||||
error(worker_type + " is an unknown worker type! Please fix!")
|
||||
# Get the set of all worker types that we have configured
|
||||
all_worker_types_in_use = set(chain(*requested_worker_types.values()))
|
||||
# Map locations to upstreams (corresponding to worker types) in Nginx
|
||||
# but only if we use the appropriate worker type
|
||||
for worker_type in all_worker_types_in_use:
|
||||
for endpoint_pattern in WORKERS_CONFIG[worker_type]["endpoint_patterns"]:
|
||||
nginx_locations[endpoint_pattern] = f"http://{worker_type}"
|
||||
|
||||
new_worker_count = worker_type_counter.setdefault(worker_type, 0) + 1
|
||||
worker_type_counter[worker_type] = new_worker_count
|
||||
# For each worker type specified by the user, create config values and write it's
|
||||
# yaml config file
|
||||
for worker_name, worker_types_set in requested_worker_types.items():
|
||||
# The collected and processed data will live here.
|
||||
worker_config: Dict[str, Any] = {}
|
||||
|
||||
# Merge all worker config templates for this worker into a single config
|
||||
for worker_type in worker_types_set:
|
||||
copy_of_template_config = WORKERS_CONFIG[worker_type].copy()
|
||||
|
||||
# Merge worker type template configuration data. It's a combination of lists
|
||||
# and dicts, so use this helper.
|
||||
worker_config = merge_worker_template_configs(
|
||||
worker_config, copy_of_template_config
|
||||
)
|
||||
|
||||
# Replace placeholder names in the config template with the actual worker name.
|
||||
worker_config = insert_worker_name_for_worker_config(worker_config, worker_name)
|
||||
|
||||
# Name workers by their type concatenated with an incrementing number
|
||||
# e.g. federation_reader1
|
||||
worker_name = worker_type + str(new_worker_count)
|
||||
worker_config.update(
|
||||
{"name": worker_name, "port": str(worker_port), "config_path": config_path}
|
||||
)
|
||||
|
||||
# Update the shared config with any worker-type specific options
|
||||
shared_config.update(worker_config["shared_extra_conf"])
|
||||
# Update the shared config with any worker_type specific options. The first of a
|
||||
# given worker_type needs to stay assigned and not be replaced.
|
||||
worker_config["shared_extra_conf"].update(shared_config)
|
||||
shared_config = worker_config["shared_extra_conf"]
|
||||
|
||||
healthcheck_urls.append("http://localhost:%d/health" % (worker_port,))
|
||||
|
||||
# Check if more than one instance of this worker type has been specified
|
||||
worker_type_total_count = worker_types.count(worker_type)
|
||||
|
||||
# Update the shared config with sharding-related options if necessary
|
||||
add_worker_roles_to_shared_config(
|
||||
shared_config, worker_type, worker_name, worker_port
|
||||
shared_config, worker_types_set, worker_name, worker_port
|
||||
)
|
||||
|
||||
# Enable the worker in supervisord
|
||||
worker_descriptors.append(worker_config)
|
||||
|
||||
# Add nginx location blocks for this worker's endpoints (if any are defined)
|
||||
for pattern in worker_config["endpoint_patterns"]:
|
||||
# Determine whether we need to load-balance this worker
|
||||
if worker_type_total_count > 1:
|
||||
# Create or add to a load-balanced upstream for this worker
|
||||
nginx_upstreams.setdefault(worker_type, set()).add(worker_port)
|
||||
|
||||
# Upstreams are named after the worker_type
|
||||
upstream = "http://" + worker_type
|
||||
else:
|
||||
upstream = "http://localhost:%d" % (worker_port,)
|
||||
|
||||
# Note that this endpoint should proxy to this upstream
|
||||
nginx_locations[pattern] = upstream
|
||||
|
||||
# Write out the worker's logging config file
|
||||
|
||||
log_config_filepath = generate_worker_log_config(environ, worker_name, data_dir)
|
||||
|
||||
# Then a worker config file
|
||||
|
@ -539,6 +820,10 @@ def generate_worker_files(
|
|||
worker_log_config_filepath=log_config_filepath,
|
||||
)
|
||||
|
||||
# Save this worker's port number to the correct nginx upstreams
|
||||
for worker_type in worker_types_set:
|
||||
nginx_upstreams.setdefault(worker_type, set()).add(worker_port)
|
||||
|
||||
worker_port += 1
|
||||
|
||||
# Build the nginx location config blocks
|
||||
|
@ -551,15 +836,14 @@ def generate_worker_files(
|
|||
|
||||
# Determine the load-balancing upstreams to configure
|
||||
nginx_upstream_config = ""
|
||||
|
||||
for upstream_worker_type, upstream_worker_ports in nginx_upstreams.items():
|
||||
for upstream_worker_base_name, upstream_worker_ports in nginx_upstreams.items():
|
||||
body = ""
|
||||
for port in upstream_worker_ports:
|
||||
body += " server localhost:%d;\n" % (port,)
|
||||
body += f" server localhost:{port};\n"
|
||||
|
||||
# Add to the list of configured upstreams
|
||||
nginx_upstream_config += NGINX_UPSTREAM_CONFIG_BLOCK.format(
|
||||
upstream_worker_type=upstream_worker_type,
|
||||
upstream_worker_base_name=upstream_worker_base_name,
|
||||
body=body,
|
||||
)
|
||||
|
||||
|
@ -580,7 +864,7 @@ def generate_worker_files(
|
|||
if reg_path.suffix.lower() in (".yaml", ".yml")
|
||||
]
|
||||
|
||||
workers_in_use = len(worker_types) > 0
|
||||
workers_in_use = len(requested_worker_types) > 0
|
||||
|
||||
# Shared homeserver config
|
||||
convert(
|
||||
|
@ -678,13 +962,26 @@ def main(args: List[str], environ: MutableMapping[str, str]) -> None:
|
|||
generate_base_homeserver_config()
|
||||
else:
|
||||
log("Base homeserver config exists—not regenerating")
|
||||
# This script may be run multiple times (mostly by Complement, see note at top of file).
|
||||
# Don't re-configure workers in this instance.
|
||||
# This script may be run multiple times (mostly by Complement, see note at top of
|
||||
# file). Don't re-configure workers in this instance.
|
||||
mark_filepath = "/conf/workers_have_been_configured"
|
||||
if not os.path.exists(mark_filepath):
|
||||
# Collect and validate worker_type requests
|
||||
# Read the desired worker configuration from the environment
|
||||
worker_types_env = environ.get("SYNAPSE_WORKER_TYPES", "").strip()
|
||||
# Only process worker_types if they exist
|
||||
if not worker_types_env:
|
||||
# No workers, just the main process
|
||||
worker_types = []
|
||||
requested_worker_types: Dict[str, Any] = {}
|
||||
else:
|
||||
# Split type names by comma, ignoring whitespace.
|
||||
worker_types = split_and_strip_string(worker_types_env, ",")
|
||||
requested_worker_types = parse_worker_types(worker_types)
|
||||
|
||||
# Always regenerate all other config files
|
||||
log("Generating worker config files")
|
||||
generate_worker_files(environ, config_path, data_dir)
|
||||
generate_worker_files(environ, config_path, data_dir, requested_worker_types)
|
||||
|
||||
# Mark workers as being configured
|
||||
with open(mark_filepath, "w") as f:
|
||||
|
|
Loading…
Reference in New Issue