Add metrics to track how often events are `soft_failed` (#10156)
Spawned from missing messages we were seeing on `matrix.org` from a federated Gtiter bridged room, https://gitlab.com/gitterHQ/webapp/-/issues/2770. The underlying issue in Synapse is tracked by https://github.com/matrix-org/synapse/issues/10066 where the message and join event race and the message is `soft_failed` before the `join` event reaches the remote federated server. Less soft_failed events = better and usually this should only trigger for events where people are doing bad things and trying to fuzz and fake everything.pull/10163/head
parent
e21c347332
commit
b31daac01c
|
@ -0,0 +1 @@
|
|||
Add `synapse_federation_soft_failed_events_total` metric to track how often events are soft failed.
|
|
@ -33,6 +33,7 @@ from typing import (
|
|||
)
|
||||
|
||||
import attr
|
||||
from prometheus_client import Counter
|
||||
from signedjson.key import decode_verify_key_bytes
|
||||
from signedjson.sign import verify_signed_json
|
||||
from unpaddedbase64 import decode_base64
|
||||
|
@ -101,6 +102,11 @@ if TYPE_CHECKING:
|
|||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
soft_failed_event_counter = Counter(
|
||||
"synapse_federation_soft_failed_events_total",
|
||||
"Events received over federation that we marked as soft_failed",
|
||||
)
|
||||
|
||||
|
||||
@attr.s(slots=True)
|
||||
class _NewEventInfo:
|
||||
|
@ -2498,6 +2504,7 @@ class FederationHandler(BaseHandler):
|
|||
event_auth.check(room_version_obj, event, auth_events=current_auth_events)
|
||||
except AuthError as e:
|
||||
logger.warning("Soft-failing %r because %s", event, e)
|
||||
soft_failed_event_counter.inc()
|
||||
event.internal_metadata.soft_failed = True
|
||||
|
||||
async def on_get_missing_events(
|
||||
|
|
Loading…
Reference in New Issue