Handle outliers in `/federation/v1/event` (#12332)
The intention here is to avoid doing state lookups for outliers in `/_matrix/federation/v1/event`. Unfortunately that's expanded into something of a rewrite of `filter_events_for_server`, which ended up trying to do that operation in a couple of places.pull/12318/merge
parent
11df4ec6c2
commit
6927d87254
|
@ -0,0 +1 @@
|
||||||
|
Avoid trying to calculate the state at outlier events.
|
|
@ -1,4 +1,5 @@
|
||||||
# Copyright 2014 - 2016 OpenMarket Ltd
|
# Copyright 2014 - 2016 OpenMarket Ltd
|
||||||
|
# Copyright (C) The Matrix.org Foundation C.I.C. 2022
|
||||||
#
|
#
|
||||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
# you may not use this file except in compliance with the License.
|
# you may not use this file except in compliance with the License.
|
||||||
|
@ -12,7 +13,9 @@
|
||||||
# See the License for the specific language governing permissions and
|
# See the License for the specific language governing permissions and
|
||||||
# limitations under the License.
|
# limitations under the License.
|
||||||
import logging
|
import logging
|
||||||
from typing import Dict, FrozenSet, List, Optional
|
from typing import Collection, Dict, FrozenSet, List, Optional, Tuple
|
||||||
|
|
||||||
|
from typing_extensions import Final
|
||||||
|
|
||||||
from synapse.api.constants import EventTypes, HistoryVisibility, Membership
|
from synapse.api.constants import EventTypes, HistoryVisibility, Membership
|
||||||
from synapse.events import EventBase
|
from synapse.events import EventBase
|
||||||
|
@ -40,6 +43,8 @@ MEMBERSHIP_PRIORITY = (
|
||||||
Membership.BAN,
|
Membership.BAN,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
_HISTORY_VIS_KEY: Final[Tuple[str, str]] = (EventTypes.RoomHistoryVisibility, "")
|
||||||
|
|
||||||
|
|
||||||
async def filter_events_for_client(
|
async def filter_events_for_client(
|
||||||
storage: Storage,
|
storage: Storage,
|
||||||
|
@ -74,7 +79,7 @@ async def filter_events_for_client(
|
||||||
# to clients.
|
# to clients.
|
||||||
events = [e for e in events if not e.internal_metadata.is_soft_failed()]
|
events = [e for e in events if not e.internal_metadata.is_soft_failed()]
|
||||||
|
|
||||||
types = ((EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, user_id))
|
types = (_HISTORY_VIS_KEY, (EventTypes.Member, user_id))
|
||||||
|
|
||||||
# we exclude outliers at this point, and then handle them separately later
|
# we exclude outliers at this point, and then handle them separately later
|
||||||
event_id_to_state = await storage.state.get_state_for_events(
|
event_id_to_state = await storage.state.get_state_for_events(
|
||||||
|
@ -157,7 +162,7 @@ async def filter_events_for_client(
|
||||||
state = event_id_to_state[event.event_id]
|
state = event_id_to_state[event.event_id]
|
||||||
|
|
||||||
# get the room_visibility at the time of the event.
|
# get the room_visibility at the time of the event.
|
||||||
visibility_event = state.get((EventTypes.RoomHistoryVisibility, ""), None)
|
visibility_event = state.get(_HISTORY_VIS_KEY, None)
|
||||||
if visibility_event:
|
if visibility_event:
|
||||||
visibility = visibility_event.content.get(
|
visibility = visibility_event.content.get(
|
||||||
"history_visibility", HistoryVisibility.SHARED
|
"history_visibility", HistoryVisibility.SHARED
|
||||||
|
@ -293,67 +298,28 @@ async def filter_events_for_server(
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def check_event_is_visible(event: EventBase, state: StateMap[EventBase]) -> bool:
|
def check_event_is_visible(
|
||||||
history = state.get((EventTypes.RoomHistoryVisibility, ""), None)
|
visibility: str, memberships: StateMap[EventBase]
|
||||||
if history:
|
) -> bool:
|
||||||
visibility = history.content.get(
|
if visibility not in (HistoryVisibility.INVITED, HistoryVisibility.JOINED):
|
||||||
"history_visibility", HistoryVisibility.SHARED
|
return True
|
||||||
)
|
|
||||||
if visibility in [HistoryVisibility.INVITED, HistoryVisibility.JOINED]:
|
|
||||||
# We now loop through all state events looking for
|
|
||||||
# membership states for the requesting server to determine
|
|
||||||
# if the server is either in the room or has been invited
|
|
||||||
# into the room.
|
|
||||||
for ev in state.values():
|
|
||||||
if ev.type != EventTypes.Member:
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
domain = get_domain_from_id(ev.state_key)
|
|
||||||
except Exception:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if domain != server_name:
|
# We now loop through all membership events looking for
|
||||||
continue
|
# membership states for the requesting server to determine
|
||||||
|
# if the server is either in the room or has been invited
|
||||||
|
# into the room.
|
||||||
|
for ev in memberships.values():
|
||||||
|
assert get_domain_from_id(ev.state_key) == server_name
|
||||||
|
|
||||||
memtype = ev.membership
|
memtype = ev.membership
|
||||||
if memtype == Membership.JOIN:
|
if memtype == Membership.JOIN:
|
||||||
return True
|
return True
|
||||||
elif memtype == Membership.INVITE:
|
elif memtype == Membership.INVITE:
|
||||||
if visibility == HistoryVisibility.INVITED:
|
if visibility == HistoryVisibility.INVITED:
|
||||||
return True
|
return True
|
||||||
else:
|
|
||||||
# server has no users in the room: redact
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
# server has no users in the room: redact
|
||||||
|
return False
|
||||||
# Lets check to see if all the events have a history visibility
|
|
||||||
# of "shared" or "world_readable". If that's the case then we don't
|
|
||||||
# need to check membership (as we know the server is in the room).
|
|
||||||
event_to_state_ids = await storage.state.get_state_ids_for_events(
|
|
||||||
frozenset(e.event_id for e in events),
|
|
||||||
state_filter=StateFilter.from_types(
|
|
||||||
types=((EventTypes.RoomHistoryVisibility, ""),)
|
|
||||||
),
|
|
||||||
)
|
|
||||||
|
|
||||||
visibility_ids = set()
|
|
||||||
for sids in event_to_state_ids.values():
|
|
||||||
hist = sids.get((EventTypes.RoomHistoryVisibility, ""))
|
|
||||||
if hist:
|
|
||||||
visibility_ids.add(hist)
|
|
||||||
|
|
||||||
# If we failed to find any history visibility events then the default
|
|
||||||
# is "shared" visibility.
|
|
||||||
if not visibility_ids:
|
|
||||||
all_open = True
|
|
||||||
else:
|
|
||||||
event_map = await storage.main.get_events(visibility_ids)
|
|
||||||
all_open = all(
|
|
||||||
e.content.get("history_visibility")
|
|
||||||
in (None, HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE)
|
|
||||||
for e in event_map.values()
|
|
||||||
)
|
|
||||||
|
|
||||||
if not check_history_visibility_only:
|
if not check_history_visibility_only:
|
||||||
erased_senders = await storage.main.are_users_erased(e.sender for e in events)
|
erased_senders = await storage.main.are_users_erased(e.sender for e in events)
|
||||||
|
@ -362,34 +328,100 @@ async def filter_events_for_server(
|
||||||
# to no users having been erased.
|
# to no users having been erased.
|
||||||
erased_senders = {}
|
erased_senders = {}
|
||||||
|
|
||||||
if all_open:
|
# Let's check to see if all the events have a history visibility
|
||||||
# all the history_visibility state affecting these events is open, so
|
# of "shared" or "world_readable". If that's the case then we don't
|
||||||
# we don't need to filter by membership state. We *do* need to check
|
# need to check membership (as we know the server is in the room).
|
||||||
# for user erasure, though.
|
event_to_history_vis = await _event_to_history_vis(storage, events)
|
||||||
if erased_senders:
|
|
||||||
to_return = []
|
|
||||||
for e in events:
|
|
||||||
if not is_sender_erased(e, erased_senders):
|
|
||||||
to_return.append(e)
|
|
||||||
elif redact:
|
|
||||||
to_return.append(prune_event(e))
|
|
||||||
|
|
||||||
return to_return
|
# for any with restricted vis, we also need the memberships
|
||||||
|
event_to_memberships = await _event_to_memberships(
|
||||||
|
storage,
|
||||||
|
[
|
||||||
|
e
|
||||||
|
for e in events
|
||||||
|
if event_to_history_vis[e.event_id]
|
||||||
|
not in (HistoryVisibility.SHARED, HistoryVisibility.WORLD_READABLE)
|
||||||
|
],
|
||||||
|
server_name,
|
||||||
|
)
|
||||||
|
|
||||||
# If there are no erased users then we can just return the given list
|
to_return = []
|
||||||
# of events without having to copy it.
|
for e in events:
|
||||||
return events
|
erased = is_sender_erased(e, erased_senders)
|
||||||
|
visible = check_event_is_visible(
|
||||||
|
event_to_history_vis[e.event_id], event_to_memberships.get(e.event_id, {})
|
||||||
|
)
|
||||||
|
if visible and not erased:
|
||||||
|
to_return.append(e)
|
||||||
|
elif redact:
|
||||||
|
to_return.append(prune_event(e))
|
||||||
|
|
||||||
# Ok, so we're dealing with events that have non-trivial visibility
|
return to_return
|
||||||
# rules, so we need to also get the memberships of the room.
|
|
||||||
|
|
||||||
# first, for each event we're wanting to return, get the event_ids
|
|
||||||
# of the history vis and membership state at those events.
|
async def _event_to_history_vis(
|
||||||
|
storage: Storage, events: Collection[EventBase]
|
||||||
|
) -> Dict[str, str]:
|
||||||
|
"""Get the history visibility at each of the given events
|
||||||
|
|
||||||
|
Returns a map from event id to history_visibility setting
|
||||||
|
"""
|
||||||
|
|
||||||
|
# outliers get special treatment here. We don't have the state at that point in the
|
||||||
|
# room (and attempting to look it up will raise an exception), so all we can really
|
||||||
|
# do is assume that the requesting server is allowed to see the event. That's
|
||||||
|
# equivalent to there not being a history_visibility event, so we just exclude
|
||||||
|
# any outliers from the query.
|
||||||
|
event_to_state_ids = await storage.state.get_state_ids_for_events(
|
||||||
|
frozenset(e.event_id for e in events if not e.internal_metadata.is_outlier()),
|
||||||
|
state_filter=StateFilter.from_types(types=(_HISTORY_VIS_KEY,)),
|
||||||
|
)
|
||||||
|
|
||||||
|
visibility_ids = {
|
||||||
|
vis_event_id
|
||||||
|
for vis_event_id in (
|
||||||
|
state_ids.get(_HISTORY_VIS_KEY) for state_ids in event_to_state_ids.values()
|
||||||
|
)
|
||||||
|
if vis_event_id
|
||||||
|
}
|
||||||
|
vis_events = await storage.main.get_events(visibility_ids)
|
||||||
|
|
||||||
|
result: Dict[str, str] = {}
|
||||||
|
for event in events:
|
||||||
|
vis = HistoryVisibility.SHARED
|
||||||
|
state_ids = event_to_state_ids.get(event.event_id)
|
||||||
|
|
||||||
|
# if we didn't find any state for this event, it's an outlier, and we assume
|
||||||
|
# it's open
|
||||||
|
visibility_id = None
|
||||||
|
if state_ids:
|
||||||
|
visibility_id = state_ids.get(_HISTORY_VIS_KEY)
|
||||||
|
|
||||||
|
if visibility_id:
|
||||||
|
vis_event = vis_events[visibility_id]
|
||||||
|
vis = vis_event.content.get("history_visibility", HistoryVisibility.SHARED)
|
||||||
|
assert isinstance(vis, str)
|
||||||
|
|
||||||
|
result[event.event_id] = vis
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
async def _event_to_memberships(
|
||||||
|
storage: Storage, events: Collection[EventBase], server_name: str
|
||||||
|
) -> Dict[str, StateMap[EventBase]]:
|
||||||
|
"""Get the remote membership list at each of the given events
|
||||||
|
|
||||||
|
Returns a map from event id to state map, which will contain only membership events
|
||||||
|
for the given server.
|
||||||
|
"""
|
||||||
|
|
||||||
|
if not events:
|
||||||
|
return {}
|
||||||
|
|
||||||
|
# for each event, get the event_ids of the membership state at those events.
|
||||||
event_to_state_ids = await storage.state.get_state_ids_for_events(
|
event_to_state_ids = await storage.state.get_state_ids_for_events(
|
||||||
frozenset(e.event_id for e in events),
|
frozenset(e.event_id for e in events),
|
||||||
state_filter=StateFilter.from_types(
|
state_filter=StateFilter.from_types(types=((EventTypes.Member, None),)),
|
||||||
types=((EventTypes.RoomHistoryVisibility, ""), (EventTypes.Member, None))
|
|
||||||
),
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# We only want to pull out member events that correspond to the
|
# We only want to pull out member events that correspond to the
|
||||||
|
@ -405,10 +437,7 @@ async def filter_events_for_server(
|
||||||
for key, event_id in key_to_eid.items()
|
for key, event_id in key_to_eid.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
def include(typ, state_key):
|
def include(state_key: str) -> bool:
|
||||||
if typ != EventTypes.Member:
|
|
||||||
return True
|
|
||||||
|
|
||||||
# we avoid using get_domain_from_id here for efficiency.
|
# we avoid using get_domain_from_id here for efficiency.
|
||||||
idx = state_key.find(":")
|
idx = state_key.find(":")
|
||||||
if idx == -1:
|
if idx == -1:
|
||||||
|
@ -416,10 +445,14 @@ async def filter_events_for_server(
|
||||||
return state_key[idx + 1 :] == server_name
|
return state_key[idx + 1 :] == server_name
|
||||||
|
|
||||||
event_map = await storage.main.get_events(
|
event_map = await storage.main.get_events(
|
||||||
[e_id for e_id, key in event_id_to_state_key.items() if include(key[0], key[1])]
|
[
|
||||||
|
e_id
|
||||||
|
for e_id, (_, state_key) in event_id_to_state_key.items()
|
||||||
|
if include(state_key)
|
||||||
|
]
|
||||||
)
|
)
|
||||||
|
|
||||||
event_to_state = {
|
return {
|
||||||
e_id: {
|
e_id: {
|
||||||
key: event_map[inner_e_id]
|
key: event_map[inner_e_id]
|
||||||
for key, inner_e_id in key_to_eid.items()
|
for key, inner_e_id in key_to_eid.items()
|
||||||
|
@ -427,14 +460,3 @@ async def filter_events_for_server(
|
||||||
}
|
}
|
||||||
for e_id, key_to_eid in event_to_state_ids.items()
|
for e_id, key_to_eid in event_to_state_ids.items()
|
||||||
}
|
}
|
||||||
|
|
||||||
to_return = []
|
|
||||||
for e in events:
|
|
||||||
erased = is_sender_erased(e, erased_senders)
|
|
||||||
visible = check_event_is_visible(e, event_to_state[e.event_id])
|
|
||||||
if visible and not erased:
|
|
||||||
to_return.append(e)
|
|
||||||
elif redact:
|
|
||||||
to_return.append(prune_event(e))
|
|
||||||
|
|
||||||
return to_return
|
|
||||||
|
|
|
@ -17,6 +17,7 @@ from unittest.mock import patch
|
||||||
|
|
||||||
from synapse.api.room_versions import RoomVersions
|
from synapse.api.room_versions import RoomVersions
|
||||||
from synapse.events import EventBase, make_event_from_dict
|
from synapse.events import EventBase, make_event_from_dict
|
||||||
|
from synapse.events.snapshot import EventContext
|
||||||
from synapse.types import JsonDict, create_requester
|
from synapse.types import JsonDict, create_requester
|
||||||
from synapse.visibility import filter_events_for_client, filter_events_for_server
|
from synapse.visibility import filter_events_for_client, filter_events_for_server
|
||||||
|
|
||||||
|
@ -73,6 +74,39 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
|
||||||
self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id)
|
self.assertEqual(events_to_filter[i].event_id, filtered[i].event_id)
|
||||||
self.assertEqual(filtered[i].content["a"], "b")
|
self.assertEqual(filtered[i].content["a"], "b")
|
||||||
|
|
||||||
|
def test_filter_outlier(self) -> None:
|
||||||
|
# outlier events must be returned, for the good of the collective federation
|
||||||
|
self.get_success(self._inject_room_member("@resident:remote_hs"))
|
||||||
|
self.get_success(self._inject_visibility("@resident:remote_hs", "joined"))
|
||||||
|
|
||||||
|
outlier = self.get_success(self._inject_outlier())
|
||||||
|
self.assertEqual(
|
||||||
|
self.get_success(
|
||||||
|
filter_events_for_server(self.storage, "remote_hs", [outlier])
|
||||||
|
),
|
||||||
|
[outlier],
|
||||||
|
)
|
||||||
|
|
||||||
|
# it should also work when there are other events in the list
|
||||||
|
evt = self.get_success(self._inject_message("@unerased:local_hs"))
|
||||||
|
|
||||||
|
filtered = self.get_success(
|
||||||
|
filter_events_for_server(self.storage, "remote_hs", [outlier, evt])
|
||||||
|
)
|
||||||
|
self.assertEqual(len(filtered), 2, f"expected 2 results, got: {filtered}")
|
||||||
|
self.assertEqual(filtered[0], outlier)
|
||||||
|
self.assertEqual(filtered[1].event_id, evt.event_id)
|
||||||
|
self.assertEqual(filtered[1].content, evt.content)
|
||||||
|
|
||||||
|
# ... but other servers should only be able to see the outlier (the other should
|
||||||
|
# be redacted)
|
||||||
|
filtered = self.get_success(
|
||||||
|
filter_events_for_server(self.storage, "other_server", [outlier, evt])
|
||||||
|
)
|
||||||
|
self.assertEqual(filtered[0], outlier)
|
||||||
|
self.assertEqual(filtered[1].event_id, evt.event_id)
|
||||||
|
self.assertNotIn("body", filtered[1].content)
|
||||||
|
|
||||||
def test_erased_user(self) -> None:
|
def test_erased_user(self) -> None:
|
||||||
# 4 message events, from erased and unerased users, with a membership
|
# 4 message events, from erased and unerased users, with a membership
|
||||||
# change in the middle of them.
|
# change in the middle of them.
|
||||||
|
@ -187,6 +221,25 @@ class FilterEventsForServerTestCase(unittest.HomeserverTestCase):
|
||||||
self.get_success(self.storage.persistence.persist_event(event, context))
|
self.get_success(self.storage.persistence.persist_event(event, context))
|
||||||
return event
|
return event
|
||||||
|
|
||||||
|
def _inject_outlier(self) -> EventBase:
|
||||||
|
builder = self.event_builder_factory.for_room_version(
|
||||||
|
RoomVersions.V1,
|
||||||
|
{
|
||||||
|
"type": "m.room.member",
|
||||||
|
"sender": "@test:user",
|
||||||
|
"state_key": "@test:user",
|
||||||
|
"room_id": TEST_ROOM_ID,
|
||||||
|
"content": {"membership": "join"},
|
||||||
|
},
|
||||||
|
)
|
||||||
|
|
||||||
|
event = self.get_success(builder.build(prev_event_ids=[], auth_event_ids=[]))
|
||||||
|
event.internal_metadata.outlier = True
|
||||||
|
self.get_success(
|
||||||
|
self.storage.persistence.persist_event(event, EventContext.for_outlier())
|
||||||
|
)
|
||||||
|
return event
|
||||||
|
|
||||||
|
|
||||||
class FilterEventsForClientTestCase(unittest.FederatingHomeserverTestCase):
|
class FilterEventsForClientTestCase(unittest.FederatingHomeserverTestCase):
|
||||||
def test_out_of_band_invite_rejection(self):
|
def test_out_of_band_invite_rejection(self):
|
||||||
|
|
Loading…
Reference in New Issue