Strip number suffix from instance name to consolidate services that traces are spread over (#13729)
The problem with many services is that it makes it hard to find which service has the trace you want, see https://github.com/jaegertracing/jaeger-ui/issues/985 Previously, we split traces out into services based on their instance name like `matrix.org client_reader-1`, etc but there are many worker instances of the same `client_reader` so there is a lot to click through. With this PR, all of the traces are just collected under the worker type like `client_reader`, `event_persister` 😇 Note: A Synapse worker instance name is an opaque string with the number convention only being our own thing for the `matrix.org` deployment. But seems pretty sensible to group things this way.pull/13733/merge
parent
3d9f82efcb
commit
f694bb71b7
|
@ -0,0 +1 @@
|
||||||
|
Strip number suffix from instance name to consolidate services that traces are spread over.
|
|
@ -203,6 +203,9 @@ if TYPE_CHECKING:
|
||||||
|
|
||||||
# Helper class
|
# Helper class
|
||||||
|
|
||||||
|
# Matches the number suffix in an instance name like "matrix.org client_reader-8"
|
||||||
|
STRIP_INSTANCE_NUMBER_SUFFIX_REGEX = re.compile(r"[_-]?\d+$")
|
||||||
|
|
||||||
|
|
||||||
class _DummyTagNames:
|
class _DummyTagNames:
|
||||||
"""wrapper of opentracings tags. We need to have them if we
|
"""wrapper of opentracings tags. We need to have them if we
|
||||||
|
@ -441,9 +444,17 @@ def init_tracer(hs: "HomeServer") -> None:
|
||||||
|
|
||||||
from jaeger_client.metrics.prometheus import PrometheusMetricsFactory
|
from jaeger_client.metrics.prometheus import PrometheusMetricsFactory
|
||||||
|
|
||||||
|
# Instance names are opaque strings but by stripping off the number suffix,
|
||||||
|
# we can get something that looks like a "worker type", e.g.
|
||||||
|
# "client_reader-1" -> "client_reader" so we don't spread the traces across
|
||||||
|
# so many services.
|
||||||
|
instance_name_by_type = re.sub(
|
||||||
|
STRIP_INSTANCE_NUMBER_SUFFIX_REGEX, "", hs.get_instance_name()
|
||||||
|
)
|
||||||
|
|
||||||
config = JaegerConfig(
|
config = JaegerConfig(
|
||||||
config=hs.config.tracing.jaeger_config,
|
config=hs.config.tracing.jaeger_config,
|
||||||
service_name=f"{hs.config.server.server_name} {hs.get_instance_name()}",
|
service_name=f"{hs.config.server.server_name} {instance_name_by_type}",
|
||||||
scope_manager=LogContextScopeManager(),
|
scope_manager=LogContextScopeManager(),
|
||||||
metrics_factory=PrometheusMetricsFactory(),
|
metrics_factory=PrometheusMetricsFactory(),
|
||||||
)
|
)
|
||||||
|
|
Loading…
Reference in New Issue