Make opentracing trace into event persistence (#10134)
* Trace event persistence When we persist a batch of events, set the parent opentracing span to the that from the request, so that we can trace all the way in. * changelog * When we force tracing, set a baggage item ... so that we can check again later. * Link in both directions between persist_events spanspull/10183/head
							parent
							
								
									d09e24a52d
								
							
						
					
					
						commit
						9e405034e5
					
				|  | @ -0,0 +1 @@ | |||
| Improve OpenTracing for event persistence. | ||||
|  | @ -207,7 +207,7 @@ class Auth: | |||
| 
 | ||||
|                 request.requester = user_id | ||||
|                 if user_id in self._force_tracing_for_users: | ||||
|                     opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) | ||||
|                     opentracing.force_tracing() | ||||
|                 opentracing.set_tag("authenticated_entity", user_id) | ||||
|                 opentracing.set_tag("user_id", user_id) | ||||
|                 opentracing.set_tag("appservice_id", app_service.id) | ||||
|  | @ -260,7 +260,7 @@ class Auth: | |||
| 
 | ||||
|             request.requester = requester | ||||
|             if user_info.token_owner in self._force_tracing_for_users: | ||||
|                 opentracing.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) | ||||
|                 opentracing.force_tracing() | ||||
|             opentracing.set_tag("authenticated_entity", user_info.token_owner) | ||||
|             opentracing.set_tag("user_id", user_info.user_id) | ||||
|             if device_id: | ||||
|  |  | |||
|  | @ -168,7 +168,7 @@ import inspect | |||
| import logging | ||||
| import re | ||||
| from functools import wraps | ||||
| from typing import TYPE_CHECKING, Dict, List, Optional, Pattern, Type | ||||
| from typing import TYPE_CHECKING, Collection, Dict, List, Optional, Pattern, Type | ||||
| 
 | ||||
| import attr | ||||
| 
 | ||||
|  | @ -278,6 +278,10 @@ class SynapseTags: | |||
|     DB_TXN_ID = "db.txn_id" | ||||
| 
 | ||||
| 
 | ||||
| class SynapseBaggage: | ||||
|     FORCE_TRACING = "synapse-force-tracing" | ||||
| 
 | ||||
| 
 | ||||
| # Block everything by default | ||||
| # A regex which matches the server_names to expose traces for. | ||||
| # None means 'block everything'. | ||||
|  | @ -285,6 +289,8 @@ _homeserver_whitelist = None  # type: Optional[Pattern[str]] | |||
| 
 | ||||
| # Util methods | ||||
| 
 | ||||
| Sentinel = object() | ||||
| 
 | ||||
| 
 | ||||
| def only_if_tracing(func): | ||||
|     """Executes the function only if we're tracing. Otherwise returns None.""" | ||||
|  | @ -447,12 +453,28 @@ def start_active_span( | |||
|     ) | ||||
| 
 | ||||
| 
 | ||||
| def start_active_span_follows_from(operation_name, contexts): | ||||
| def start_active_span_follows_from( | ||||
|     operation_name: str, contexts: Collection, inherit_force_tracing=False | ||||
| ): | ||||
|     """Starts an active opentracing span, with additional references to previous spans | ||||
| 
 | ||||
|     Args: | ||||
|         operation_name: name of the operation represented by the new span | ||||
|         contexts: the previous spans to inherit from | ||||
|         inherit_force_tracing: if set, and any of the previous contexts have had tracing | ||||
|            forced, the new span will also have tracing forced. | ||||
|     """ | ||||
|     if opentracing is None: | ||||
|         return noop_context_manager() | ||||
| 
 | ||||
|     references = [opentracing.follows_from(context) for context in contexts] | ||||
|     scope = start_active_span(operation_name, references=references) | ||||
| 
 | ||||
|     if inherit_force_tracing and any( | ||||
|         is_context_forced_tracing(ctx) for ctx in contexts | ||||
|     ): | ||||
|         force_tracing(scope.span) | ||||
| 
 | ||||
|     return scope | ||||
| 
 | ||||
| 
 | ||||
|  | @ -551,6 +573,10 @@ def start_active_span_from_edu( | |||
| 
 | ||||
| 
 | ||||
| # Opentracing setters for tags, logs, etc | ||||
| @only_if_tracing | ||||
| def active_span(): | ||||
|     """Get the currently active span, if any""" | ||||
|     return opentracing.tracer.active_span | ||||
| 
 | ||||
| 
 | ||||
| @ensure_active_span("set a tag") | ||||
|  | @ -571,6 +597,33 @@ def set_operation_name(operation_name): | |||
|     opentracing.tracer.active_span.set_operation_name(operation_name) | ||||
| 
 | ||||
| 
 | ||||
| @only_if_tracing | ||||
| def force_tracing(span=Sentinel) -> None: | ||||
|     """Force sampling for the active/given span and its children. | ||||
| 
 | ||||
|     Args: | ||||
|         span: span to force tracing for. By default, the active span. | ||||
|     """ | ||||
|     if span is Sentinel: | ||||
|         span = opentracing.tracer.active_span | ||||
|     if span is None: | ||||
|         logger.error("No active span in force_tracing") | ||||
|         return | ||||
| 
 | ||||
|     span.set_tag(opentracing.tags.SAMPLING_PRIORITY, 1) | ||||
| 
 | ||||
|     # also set a bit of baggage, so that we have a way of figuring out if | ||||
|     # it is enabled later | ||||
|     span.set_baggage_item(SynapseBaggage.FORCE_TRACING, "1") | ||||
| 
 | ||||
| 
 | ||||
| def is_context_forced_tracing(span_context) -> bool: | ||||
|     """Check if sampling has been force for the given span context.""" | ||||
|     if span_context is None: | ||||
|         return False | ||||
|     return span_context.baggage.get(SynapseBaggage.FORCE_TRACING) is not None | ||||
| 
 | ||||
| 
 | ||||
| # Injection and extraction | ||||
| 
 | ||||
| 
 | ||||
|  |  | |||
|  | @ -18,6 +18,7 @@ import itertools | |||
| import logging | ||||
| from collections import deque | ||||
| from typing import ( | ||||
|     Any, | ||||
|     Awaitable, | ||||
|     Callable, | ||||
|     Collection, | ||||
|  | @ -40,6 +41,7 @@ from twisted.internet import defer | |||
| from synapse.api.constants import EventTypes, Membership | ||||
| from synapse.events import EventBase | ||||
| from synapse.events.snapshot import EventContext | ||||
| from synapse.logging import opentracing | ||||
| from synapse.logging.context import PreserveLoggingContext, make_deferred_yieldable | ||||
| from synapse.metrics.background_process_metrics import run_as_background_process | ||||
| from synapse.storage.databases import Databases | ||||
|  | @ -103,12 +105,18 @@ times_pruned_extremities = Counter( | |||
| ) | ||||
| 
 | ||||
| 
 | ||||
| @attr.s(auto_attribs=True, frozen=True, slots=True) | ||||
| @attr.s(auto_attribs=True, slots=True) | ||||
| class _EventPersistQueueItem: | ||||
|     events_and_contexts: List[Tuple[EventBase, EventContext]] | ||||
|     backfilled: bool | ||||
|     deferred: ObservableDeferred | ||||
| 
 | ||||
|     parent_opentracing_span_contexts: List = [] | ||||
|     """A list of opentracing spans waiting for this batch""" | ||||
| 
 | ||||
|     opentracing_span_context: Any = None | ||||
|     """The opentracing span under which the persistence actually happened""" | ||||
| 
 | ||||
| 
 | ||||
| _PersistResult = TypeVar("_PersistResult") | ||||
| 
 | ||||
|  | @ -171,9 +179,27 @@ class _EventPeristenceQueue(Generic[_PersistResult]): | |||
|             ) | ||||
|             queue.append(end_item) | ||||
| 
 | ||||
|         # add our events to the queue item | ||||
|         end_item.events_and_contexts.extend(events_and_contexts) | ||||
| 
 | ||||
|         # also add our active opentracing span to the item so that we get a link back | ||||
|         span = opentracing.active_span() | ||||
|         if span: | ||||
|             end_item.parent_opentracing_span_contexts.append(span.context) | ||||
| 
 | ||||
|         # start a processor for the queue, if there isn't one already | ||||
|         self._handle_queue(room_id) | ||||
|         return await make_deferred_yieldable(end_item.deferred.observe()) | ||||
| 
 | ||||
|         # wait for the queue item to complete | ||||
|         res = await make_deferred_yieldable(end_item.deferred.observe()) | ||||
| 
 | ||||
|         # add another opentracing span which links to the persist trace. | ||||
|         with opentracing.start_active_span_follows_from( | ||||
|             "persist_event_batch_complete", (end_item.opentracing_span_context,) | ||||
|         ): | ||||
|             pass | ||||
| 
 | ||||
|         return res | ||||
| 
 | ||||
|     def _handle_queue(self, room_id): | ||||
|         """Attempts to handle the queue for a room if not already being handled. | ||||
|  | @ -200,9 +226,17 @@ class _EventPeristenceQueue(Generic[_PersistResult]): | |||
|                 queue = self._get_drainining_queue(room_id) | ||||
|                 for item in queue: | ||||
|                     try: | ||||
|                         ret = await self._per_item_callback( | ||||
|                             item.events_and_contexts, item.backfilled | ||||
|                         ) | ||||
|                         with opentracing.start_active_span_follows_from( | ||||
|                             "persist_event_batch", | ||||
|                             item.parent_opentracing_span_contexts, | ||||
|                             inherit_force_tracing=True, | ||||
|                         ) as scope: | ||||
|                             if scope: | ||||
|                                 item.opentracing_span_context = scope.span.context | ||||
| 
 | ||||
|                             ret = await self._per_item_callback( | ||||
|                                 item.events_and_contexts, item.backfilled | ||||
|                             ) | ||||
|                     except Exception: | ||||
|                         with PreserveLoggingContext(): | ||||
|                             item.deferred.errback() | ||||
|  | @ -252,6 +286,7 @@ class EventsPersistenceStorage: | |||
|         self._event_persist_queue = _EventPeristenceQueue(self._persist_event_batch) | ||||
|         self._state_resolution_handler = hs.get_state_resolution_handler() | ||||
| 
 | ||||
|     @opentracing.trace | ||||
|     async def persist_events( | ||||
|         self, | ||||
|         events_and_contexts: Iterable[Tuple[EventBase, EventContext]], | ||||
|  | @ -307,6 +342,7 @@ class EventsPersistenceStorage: | |||
|             self.main_store.get_room_max_token(), | ||||
|         ) | ||||
| 
 | ||||
|     @opentracing.trace | ||||
|     async def persist_event( | ||||
|         self, event: EventBase, context: EventContext, backfilled: bool = False | ||||
|     ) -> Tuple[EventBase, PersistedEventPosition, RoomStreamToken]: | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Richard van der Hoff
						Richard van der Hoff