From a8a32d2714705d679584c5e10dfa79d9b4a8a76f Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2016 11:23:39 +0100 Subject: [PATCH 1/2] Ensure we only persist an event once at a time --- synapse/storage/events.py | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index c63ca36df6..670aa8f118 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -26,7 +26,7 @@ from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from canonicaljson import encode_canonical_json -from collections import deque, namedtuple +from collections import deque, namedtuple, OrderedDict import synapse import synapse.metrics @@ -403,6 +403,23 @@ class EventsStore(SQLBaseStore): and the rejections table. Things reading from those table will need to check whether the event was rejected. """ + # Ensure that we don't have the same event twice. + # Pick the earliest non-outlier if there is one, else the earliest one. + new_events_and_contexts = OrderedDict() + for event, context in events_and_contexts: + prev_event_context = new_events_and_contexts.get(event.event_id) + if prev_event_context: + if not event.internal_metadata.is_outlier(): + if prev_event_context[0].internal_metadata.is_outlier(): + # To ensure correct ordering we pop, as OrderedDict is + # ordered by first insertion. + new_events_and_contexts.pop(event.event_id, None) + new_events_and_contexts[event.event_id] = (event, context) + else: + new_events_and_contexts[event.event_id] = (event, context) + + events_and_contexts = new_events_and_contexts.values() + depth_updates = {} for event, context in events_and_contexts: # Remove the any existing cache entries for the event_ids From 80ad7102176a3b24ab589b6eb4aa42b872f054da Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Wed, 3 Aug 2016 13:22:26 +0100 Subject: [PATCH 2/2] Remove other bit of deduplication --- synapse/storage/events.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/synapse/storage/events.py b/synapse/storage/events.py index 670aa8f118..4664cfe6d9 100644 --- a/synapse/storage/events.py +++ b/synapse/storage/events.py @@ -450,8 +450,6 @@ class EventsStore(SQLBaseStore): for event_id, outlier in txn.fetchall() } - # Remove the events that we've seen before. - event_map = {} to_remove = set() for event, context in events_and_contexts: if context.rejected: @@ -462,23 +460,6 @@ class EventsStore(SQLBaseStore): to_remove.add(event) continue - # Handle the case of the list including the same event multiple - # times. The tricky thing here is when they differ by whether - # they are an outlier. - if event.event_id in event_map: - other = event_map[event.event_id] - - if not other.internal_metadata.is_outlier(): - to_remove.add(event) - continue - elif not event.internal_metadata.is_outlier(): - to_remove.add(event) - continue - else: - to_remove.add(other) - - event_map[event.event_id] = event - if event.event_id not in have_persisted: continue