Merge pull request #4699 from matrix-org/erikj/stop_fed_not_in_room
Stop backpaginating when events not visiblehhs-9/hs/log-mods
commit
b050a10871
|
@ -0,0 +1 @@
|
||||||
|
Fix attempting to paginate in rooms where server cannot see any events, to avoid unnecessarily pulling in lots of redacted events.
|
|
@ -858,6 +858,52 @@ class FederationHandler(BaseHandler):
|
||||||
logger.debug("Not backfilling as no extremeties found.")
|
logger.debug("Not backfilling as no extremeties found.")
|
||||||
return
|
return
|
||||||
|
|
||||||
|
# We only want to paginate if we can actually see the events we'll get,
|
||||||
|
# as otherwise we'll just spend a lot of resources to get redacted
|
||||||
|
# events.
|
||||||
|
#
|
||||||
|
# We do this by filtering all the backwards extremities and seeing if
|
||||||
|
# any remain. Given we don't have the extremity events themselves, we
|
||||||
|
# need to actually check the events that reference them.
|
||||||
|
#
|
||||||
|
# *Note*: the spec wants us to keep backfilling until we reach the start
|
||||||
|
# of the room in case we are allowed to see some of the history. However
|
||||||
|
# in practice that causes more issues than its worth, as a) its
|
||||||
|
# relatively rare for there to be any visible history and b) even when
|
||||||
|
# there is its often sufficiently long ago that clients would stop
|
||||||
|
# attempting to paginate before backfill reached the visible history.
|
||||||
|
#
|
||||||
|
# TODO: If we do do a backfill then we should filter the backwards
|
||||||
|
# extremities to only include those that point to visible portions of
|
||||||
|
# history.
|
||||||
|
#
|
||||||
|
# TODO: Correctly handle the case where we are allowed to see the
|
||||||
|
# forward event but not the backward extremity, e.g. in the case of
|
||||||
|
# initial join of the server where we are allowed to see the join
|
||||||
|
# event but not anything before it. This would require looking at the
|
||||||
|
# state *before* the event, ignoring the special casing certain event
|
||||||
|
# types have.
|
||||||
|
|
||||||
|
forward_events = yield self.store.get_successor_events(
|
||||||
|
list(extremities),
|
||||||
|
)
|
||||||
|
|
||||||
|
extremities_events = yield self.store.get_events(
|
||||||
|
forward_events,
|
||||||
|
check_redacted=False,
|
||||||
|
get_prev_content=False,
|
||||||
|
)
|
||||||
|
|
||||||
|
# We set `check_history_visibility_only` as we might otherwise get false
|
||||||
|
# positives from users having been erased.
|
||||||
|
filtered_extremities = yield filter_events_for_server(
|
||||||
|
self.store, self.server_name, list(extremities_events.values()),
|
||||||
|
redact=False, check_history_visibility_only=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
if not filtered_extremities:
|
||||||
|
defer.returnValue(False)
|
||||||
|
|
||||||
# Check if we reached a point where we should start backfilling.
|
# Check if we reached a point where we should start backfilling.
|
||||||
sorted_extremeties_tuple = sorted(
|
sorted_extremeties_tuple = sorted(
|
||||||
extremities.items(),
|
extremities.items(),
|
||||||
|
|
|
@ -442,6 +442,28 @@ class EventFederationWorkerStore(EventsWorkerStore, SignatureWorkerStore,
|
||||||
event_results.reverse()
|
event_results.reverse()
|
||||||
return event_results
|
return event_results
|
||||||
|
|
||||||
|
@defer.inlineCallbacks
|
||||||
|
def get_successor_events(self, event_ids):
|
||||||
|
"""Fetch all events that have the given events as a prev event
|
||||||
|
|
||||||
|
Args:
|
||||||
|
event_ids (iterable[str])
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
Deferred[list[str]]
|
||||||
|
"""
|
||||||
|
rows = yield self._simple_select_many_batch(
|
||||||
|
table="event_edges",
|
||||||
|
column="prev_event_id",
|
||||||
|
iterable=event_ids,
|
||||||
|
retcols=("event_id",),
|
||||||
|
desc="get_successor_events"
|
||||||
|
)
|
||||||
|
|
||||||
|
defer.returnValue([
|
||||||
|
row["event_id"] for row in rows
|
||||||
|
])
|
||||||
|
|
||||||
|
|
||||||
class EventFederationStore(EventFederationWorkerStore):
|
class EventFederationStore(EventFederationWorkerStore):
|
||||||
""" Responsible for storing and serving up the various graphs associated
|
""" Responsible for storing and serving up the various graphs associated
|
||||||
|
|
|
@ -216,28 +216,36 @@ def filter_events_for_client(store, user_id, events, is_peeking=False,
|
||||||
|
|
||||||
|
|
||||||
@defer.inlineCallbacks
|
@defer.inlineCallbacks
|
||||||
def filter_events_for_server(store, server_name, events):
|
def filter_events_for_server(store, server_name, events, redact=True,
|
||||||
# Whatever else we do, we need to check for senders which have requested
|
check_history_visibility_only=False):
|
||||||
# erasure of their data.
|
"""Filter a list of events based on whether given server is allowed to
|
||||||
erased_senders = yield store.are_users_erased(
|
see them.
|
||||||
(e.sender for e in events),
|
|
||||||
)
|
|
||||||
|
|
||||||
def redact_disallowed(event, state):
|
Args:
|
||||||
# if the sender has been gdpr17ed, always return a redacted
|
store (DataStore)
|
||||||
# copy of the event.
|
server_name (str)
|
||||||
if erased_senders[event.sender]:
|
events (iterable[FrozenEvent])
|
||||||
|
redact (bool): Whether to return a redacted version of the event, or
|
||||||
|
to filter them out entirely.
|
||||||
|
check_history_visibility_only (bool): Whether to only check the
|
||||||
|
history visibility, rather than things like if the sender has been
|
||||||
|
erased. This is used e.g. during pagination to decide whether to
|
||||||
|
backfill or not.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
Deferred[list[FrozenEvent]]
|
||||||
|
"""
|
||||||
|
|
||||||
|
def is_sender_erased(event, erased_senders):
|
||||||
|
if erased_senders and erased_senders[event.sender]:
|
||||||
logger.info(
|
logger.info(
|
||||||
"Sender of %s has been erased, redacting",
|
"Sender of %s has been erased, redacting",
|
||||||
event.event_id,
|
event.event_id,
|
||||||
)
|
)
|
||||||
return prune_event(event)
|
return True
|
||||||
|
return False
|
||||||
# state will be None if we decided we didn't need to filter by
|
|
||||||
# room membership.
|
|
||||||
if not state:
|
|
||||||
return event
|
|
||||||
|
|
||||||
|
def check_event_is_visible(event, state):
|
||||||
history = state.get((EventTypes.RoomHistoryVisibility, ''), None)
|
history = state.get((EventTypes.RoomHistoryVisibility, ''), None)
|
||||||
if history:
|
if history:
|
||||||
visibility = history.content.get("history_visibility", "shared")
|
visibility = history.content.get("history_visibility", "shared")
|
||||||
|
@ -259,17 +267,17 @@ def filter_events_for_server(store, server_name, events):
|
||||||
|
|
||||||
memtype = ev.membership
|
memtype = ev.membership
|
||||||
if memtype == Membership.JOIN:
|
if memtype == Membership.JOIN:
|
||||||
return event
|
return True
|
||||||
elif memtype == Membership.INVITE:
|
elif memtype == Membership.INVITE:
|
||||||
if visibility == "invited":
|
if visibility == "invited":
|
||||||
return event
|
return True
|
||||||
else:
|
else:
|
||||||
# server has no users in the room: redact
|
# server has no users in the room: redact
|
||||||
return prune_event(event)
|
return False
|
||||||
|
|
||||||
return event
|
return True
|
||||||
|
|
||||||
# Next lets check to see if all the events have a history visibility
|
# Lets check to see if all the events have a history visibility
|
||||||
# of "shared" or "world_readable". If thats the case then we don't
|
# of "shared" or "world_readable". If thats the case then we don't
|
||||||
# need to check membership (as we know the server is in the room).
|
# need to check membership (as we know the server is in the room).
|
||||||
event_to_state_ids = yield store.get_state_ids_for_events(
|
event_to_state_ids = yield store.get_state_ids_for_events(
|
||||||
|
@ -296,16 +304,31 @@ def filter_events_for_server(store, server_name, events):
|
||||||
for e in itervalues(event_map)
|
for e in itervalues(event_map)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
if not check_history_visibility_only:
|
||||||
|
erased_senders = yield store.are_users_erased(
|
||||||
|
(e.sender for e in events),
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
# We don't want to check whether users are erased, which is equivalent
|
||||||
|
# to no users having been erased.
|
||||||
|
erased_senders = {}
|
||||||
|
|
||||||
if all_open:
|
if all_open:
|
||||||
# all the history_visibility state affecting these events is open, so
|
# all the history_visibility state affecting these events is open, so
|
||||||
# we don't need to filter by membership state. We *do* need to check
|
# we don't need to filter by membership state. We *do* need to check
|
||||||
# for user erasure, though.
|
# for user erasure, though.
|
||||||
if erased_senders:
|
if erased_senders:
|
||||||
events = [
|
to_return = []
|
||||||
redact_disallowed(e, None)
|
for e in events:
|
||||||
for e in events
|
if not is_sender_erased(e, erased_senders):
|
||||||
]
|
to_return.append(e)
|
||||||
|
elif redact:
|
||||||
|
to_return.append(prune_event(e))
|
||||||
|
|
||||||
|
defer.returnValue(to_return)
|
||||||
|
|
||||||
|
# If there are no erased users then we can just return the given list
|
||||||
|
# of events without having to copy it.
|
||||||
defer.returnValue(events)
|
defer.returnValue(events)
|
||||||
|
|
||||||
# Ok, so we're dealing with events that have non-trivial visibility
|
# Ok, so we're dealing with events that have non-trivial visibility
|
||||||
|
@ -361,7 +384,13 @@ def filter_events_for_server(store, server_name, events):
|
||||||
for e_id, key_to_eid in iteritems(event_to_state_ids)
|
for e_id, key_to_eid in iteritems(event_to_state_ids)
|
||||||
}
|
}
|
||||||
|
|
||||||
defer.returnValue([
|
to_return = []
|
||||||
redact_disallowed(e, event_to_state[e.event_id])
|
for e in events:
|
||||||
for e in events
|
erased = is_sender_erased(e, erased_senders)
|
||||||
])
|
visible = check_event_is_visible(e, event_to_state[e.event_id])
|
||||||
|
if visible and not erased:
|
||||||
|
to_return.append(e)
|
||||||
|
elif redact:
|
||||||
|
to_return.append(prune_event(e))
|
||||||
|
|
||||||
|
defer.returnValue(to_return)
|
||||||
|
|
Loading…
Reference in New Issue