From df3a661e4adb7676682a5e3c298a2dfda18b08a1 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 18 Jan 2019 10:04:47 +0000 Subject: [PATCH 01/11] Search for messages across predecessor rooms Signed-off-by: Andrew Morgan --- synapse/api/filtering.py | 3 ++ synapse/handlers/search.py | 69 ++++++++++++++++++++++++++++++++++++++ synapse/storage/state.py | 1 + 3 files changed, 73 insertions(+) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 16ad654864..84000e6422 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,6 +444,9 @@ class Filter(object): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) + def add_room_ids(self, room_ids): + self.rooms += room_ids + def _matches_wildcard(actual_value, filter_value): if filter_value.endswith("*"): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index ec936bbb4e..77e7e4e0fb 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -37,6 +37,54 @@ class SearchHandler(BaseHandler): def __init__(self, hs): super(SearchHandler, self).__init__(hs) + @defer.inlineCallbacks + def get_old_rooms_from_upgraded_room(self, room_id): + """Retrieves room IDs of old rooms in the history of an upgraded room. + + We do so by checking the m.room.create event of the room for a + `predecessor` key. If it exists, we add the room ID to our return + list and then check that room for a m.room.create event and so on + until we can no longer find any more previous rooms. + + The full list of all found rooms in then returned. + + Args: + room_id (str): The ID of the room to search through. + + Returns: + dict of past room IDs as strings + """ + + historical_room_ids = [] + + while True: + state_ids = yield self.store.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + break + + # Retrieve the room's create event + create_event = yield self.store.get_event(create_id) + + if not create_event: + break + + # Check if a predecessor room is present + predecessor = create_event.content.get("predecessor", None) + if not predecessor: + break + + # Add predecessor's room ID + historical_room_id = predecessor["room_id"] + historical_room_ids.append(historical_room_id) + + # Scan through the old room for further predecessors + room_id = historical_room_id + + defer.returnValue(historical_room_ids) + @defer.inlineCallbacks def search(self, user, content, batch=None): """Performs a full text search for a user. @@ -139,6 +187,27 @@ class SearchHandler(BaseHandler): room_ids = search_filter.filter_rooms(room_ids) + # If doing a subset of all rooms seearch, check if any of the rooms + # are from an upgraded room, and search their contents as well + # XXX: There is the possibility that we don't have a create event for + # the room in question, in which case we can't return all the results + # we want to. + # Ideally we would just return the results we can get now, and + # try to get more results from other servers in the background. + if search_filter.rooms: + historical_room_ids = [] + for room_id in room_ids: + # Add any previous rooms to the search if they exist + ids = yield self.get_old_rooms_from_upgraded_room(room_id) + historical_room_ids += ids + + # Add any found rooms to the list to search + for historical_room_id in historical_room_ids: + room_ids.add(historical_room_id) + + # Prevent any historical events from being filtered + search_filter.add_room_ids(historical_room_ids) + if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index a134e9b3e8..49b3ff4a71 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -448,6 +448,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: deferred: dict of (type, state_key) -> event_id """ + def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events From cb80db894186c4c9a4991e0623530a038eb82543 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Fri, 18 Jan 2019 11:22:00 +0000 Subject: [PATCH 02/11] Add changelog --- changelog.d/4415.feature | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/4415.feature diff --git a/changelog.d/4415.feature b/changelog.d/4415.feature new file mode 100644 index 0000000000..1fb1d58f8f --- /dev/null +++ b/changelog.d/4415.feature @@ -0,0 +1 @@ +Search now includes results from predecessor rooms after a room upgrade. \ No newline at end of file From c9bfb058d85f6205fada062c78a4d1eca119417c Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 11:12:48 +0000 Subject: [PATCH 03/11] Fix a bug with single-room search searching all rooms * Create a new method for getting predecessor rooms * Remove formatting change --- synapse/api/filtering.py | 15 ++++++++++++-- synapse/handlers/search.py | 42 +++++++++----------------------------- synapse/storage/state.py | 29 +++++++++++++++++++++++++- 3 files changed, 51 insertions(+), 35 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 84000e6422..0d8957175d 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -444,8 +444,19 @@ class Filter(object): def include_redundant_members(self): return self.filter_json.get("include_redundant_members", False) - def add_room_ids(self, room_ids): - self.rooms += room_ids + def with_room_ids(self, room_ids): + """Returns a new filter with the given room IDs appended. + + Args: + room_ids (list): A list of room_ids. + + Returns: + filter: A new filter including the given rooms and the old + filter's rooms. + """ + newFilter = self + newFilter.rooms += room_ids + return newFilter def _matches_wildcard(actual_value, filter_value): diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 77e7e4e0fb..75c26fe065 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -49,39 +49,26 @@ class SearchHandler(BaseHandler): The full list of all found rooms in then returned. Args: - room_id (str): The ID of the room to search through. + room_id (str): id of the room to search through. Returns: - dict of past room IDs as strings + Deferred[iterable[str]]: predecessor room ids """ historical_room_ids = [] while True: - state_ids = yield self.store.get_current_state_ids(room_id) - create_id = state_ids.get((EventTypes.Create, "")) + predecessor = yield self.store.get_room_predecessor(room_id) - # If we can't find the create event, assume we've hit a dead end - if not create_id: - break - - # Retrieve the room's create event - create_event = yield self.store.get_event(create_id) - - if not create_event: - break - - # Check if a predecessor room is present - predecessor = create_event.content.get("predecessor", None) + # If no predecessor, assume we've hit a dead end if not predecessor: break # Add predecessor's room ID - historical_room_id = predecessor["room_id"] - historical_room_ids.append(historical_room_id) + historical_room_ids.append(predecessor["room_id"]) # Scan through the old room for further predecessors - room_id = historical_room_id + room_id = predecessor["room_id"] defer.returnValue(historical_room_ids) @@ -185,28 +172,19 @@ class SearchHandler(BaseHandler): ) room_ids = set(r.room_id for r in rooms) - room_ids = search_filter.filter_rooms(room_ids) - # If doing a subset of all rooms seearch, check if any of the rooms # are from an upgraded room, and search their contents as well - # XXX: There is the possibility that we don't have a create event for - # the room in question, in which case we can't return all the results - # we want to. - # Ideally we would just return the results we can get now, and - # try to get more results from other servers in the background. if search_filter.rooms: historical_room_ids = [] - for room_id in room_ids: + for room_id in search_filter.rooms: # Add any previous rooms to the search if they exist ids = yield self.get_old_rooms_from_upgraded_room(room_id) historical_room_ids += ids - # Add any found rooms to the list to search - for historical_room_id in historical_room_ids: - room_ids.add(historical_room_id) - # Prevent any historical events from being filtered - search_filter.add_room_ids(historical_room_ids) + search_filter = search_filter.with_room_ids(historical_room_ids) + + room_ids = search_filter.filter_rooms(room_ids) if batch_group == "room_id": room_ids.intersection_update({batch_group_key}) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 49b3ff4a71..b064671851 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -437,6 +437,34 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): create_event = yield self.get_event(create_id) defer.returnValue(create_event.content.get("room_version", "1")) + @defer.inlineCallbacks + def get_room_predecessor(self, room_id): + """Get the predecessor room of an upgraded room if one exists. + Otherwise return None. + + Args: + room_id (str) + + Returns: + Deferred[str]: predecessor room id + """ + + state_ids = yield self.get_current_state_ids(room_id) + create_id = state_ids.get((EventTypes.Create, "")) + + # If we can't find the create event, assume we've hit a dead end + if not create_id: + return None + + # Retrieve the room's create event + create_event = yield self.get_event(create_id) + + if not create_event: + return None + + # Return predecessor if present + return create_event.content.get("predecessor", None) + @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): """Get the current state event ids for a room based on the @@ -448,7 +476,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: deferred: dict of (type, state_key) -> event_id """ - def _get_current_state_ids_txn(txn): txn.execute( """SELECT type, state_key, event_id FROM current_state_events From c433f6109145f0cf6c80dd07ee118b68a3a0cd4e Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 12:06:36 +0000 Subject: [PATCH 04/11] Ensure new filter is actually created --- synapse/api/filtering.py | 2 +- synapse/storage/state.py | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index 0d8957175d..f3a056110f 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -454,7 +454,7 @@ class Filter(object): filter: A new filter including the given rooms and the old filter's rooms. """ - newFilter = self + newFilter = Filter(self.filter_json) newFilter.rooms += room_ids return newFilter diff --git a/synapse/storage/state.py b/synapse/storage/state.py index b064671851..981d1e3600 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -448,7 +448,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): Returns: Deferred[str]: predecessor room id """ - state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) From 277e50462d1422ac8cfe2df7cd2213288f3d14c5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Tue, 22 Jan 2019 12:40:26 +0000 Subject: [PATCH 05/11] Do not return in a deferred function --- synapse/storage/state.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index 981d1e3600..fceb9744aa 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -453,16 +453,16 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): # If we can't find the create event, assume we've hit a dead end if not create_id: - return None + defer.returnValue(None) # Retrieve the room's create event create_event = yield self.get_event(create_id) if not create_event: - return None + defer.returnValue(None) # Return predecessor if present - return create_event.content.get("predecessor", None) + defer.returnValue(create_event.content.get("predecessor", None)) @cached(max_entries=100000, iterable=True) def get_current_state_ids(self, room_id): From 8ea509a9357d53f71e7bef09aae59f53b9f2317e Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 24 Jan 2019 17:21:35 +0000 Subject: [PATCH 06/11] Update synapse/api/filtering.py Co-Authored-By: anoadragon453 <1342360+anoadragon453@users.noreply.github.com> --- synapse/api/filtering.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/api/filtering.py b/synapse/api/filtering.py index f3a056110f..3906475403 100644 --- a/synapse/api/filtering.py +++ b/synapse/api/filtering.py @@ -448,7 +448,7 @@ class Filter(object): """Returns a new filter with the given room IDs appended. Args: - room_ids (list): A list of room_ids. + room_ids (iterable[unicode]): The room_ids to add Returns: filter: A new filter including the given rooms and the old From 03c85335d1d386c0523af3b6bf992f83bfb905d7 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Thu, 24 Jan 2019 17:22:09 +0000 Subject: [PATCH 07/11] Apply suggestions from code review Co-Authored-By: anoadragon453 <1342360+anoadragon453@users.noreply.github.com> --- synapse/handlers/search.py | 2 +- synapse/storage/state.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/handlers/search.py b/synapse/handlers/search.py index 75c26fe065..49c439313e 100644 --- a/synapse/handlers/search.py +++ b/synapse/handlers/search.py @@ -52,7 +52,7 @@ class SearchHandler(BaseHandler): room_id (str): id of the room to search through. Returns: - Deferred[iterable[str]]: predecessor room ids + Deferred[iterable[unicode]]: predecessor room ids """ historical_room_ids = [] diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fceb9744aa..0a0691cd00 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -446,7 +446,7 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): room_id (str) Returns: - Deferred[str]: predecessor room id + Deferred[unicode|None]: predecessor room id """ state_ids = yield self.get_current_state_ids(room_id) create_id = state_ids.get((EventTypes.Create, "")) From e1781b043b4a73e1b22142b6e09c07ddd782ae57 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Thu, 24 Jan 2019 17:23:39 +0000 Subject: [PATCH 08/11] Remove redundant create event None check --- synapse/storage/state.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/synapse/storage/state.py b/synapse/storage/state.py index fceb9744aa..c02130d9d6 100644 --- a/synapse/storage/state.py +++ b/synapse/storage/state.py @@ -458,9 +458,6 @@ class StateGroupWorkerStore(EventsWorkerStore, SQLBaseStore): # Retrieve the room's create event create_event = yield self.get_event(create_id) - if not create_event: - defer.returnValue(None) - # Return predecessor if present defer.returnValue(create_event.content.get("predecessor", None)) From 0b3fd1401fdebb944729cf46d6de9c3bff482933 Mon Sep 17 00:00:00 2001 From: Andrew Morgan <1342360+anoadragon453@users.noreply.github.com> Date: Fri, 25 Jan 2019 11:25:02 +0000 Subject: [PATCH 09/11] Don't require sqlite3 when using postgres (#4466) --- changelog.d/4466.misc | 1 + synapse/storage/engines/sqlite.py | 3 +-- 2 files changed, 2 insertions(+), 2 deletions(-) create mode 100644 changelog.d/4466.misc diff --git a/changelog.d/4466.misc b/changelog.d/4466.misc new file mode 100644 index 0000000000..58130b6190 --- /dev/null +++ b/changelog.d/4466.misc @@ -0,0 +1 @@ +Synapse will now take advantage of native UPSERT functionality in PostgreSQL 9.5+ and SQLite 3.24+. diff --git a/synapse/storage/engines/sqlite.py b/synapse/storage/engines/sqlite.py index c64d73ff21..059ab81055 100644 --- a/synapse/storage/engines/sqlite.py +++ b/synapse/storage/engines/sqlite.py @@ -15,7 +15,6 @@ import struct import threading -from sqlite3 import sqlite_version_info from synapse.storage.prepare_database import prepare_database @@ -37,7 +36,7 @@ class Sqlite3Engine(object): Do we support native UPSERTs? This requires SQLite3 3.24+, plus some more work we haven't done yet to tell what was inserted vs updated. """ - return sqlite_version_info >= (3, 24, 0) + return self.module.sqlite_version_info >= (3, 24, 0) def check_database(self, txn): pass From 8520bc3109c2de6175391497941f3fc0b74c08e5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 25 Jan 2019 12:38:16 +0000 Subject: [PATCH 10/11] Fix Host header sent by MatrixFederationAgent (#4468) Move the Host header logic down here so that (a) it is used if we reuse the agent elsewhere, and (b) we can mess about with it with .well-known. --- changelog.d/4468.misc | 1 + .../http/federation/matrix_federation_agent.py | 10 ++++++++++ synapse/http/matrixfederationclient.py | 1 - .../federation/test_matrix_federation_agent.py | 16 ++++++++++++++++ tests/http/test_fedclient.py | 2 +- 5 files changed, 28 insertions(+), 2 deletions(-) create mode 100644 changelog.d/4468.misc diff --git a/changelog.d/4468.misc b/changelog.d/4468.misc new file mode 100644 index 0000000000..9a51434755 --- /dev/null +++ b/changelog.d/4468.misc @@ -0,0 +1 @@ +Move SRV logic into the Agent layer diff --git a/synapse/http/federation/matrix_federation_agent.py b/synapse/http/federation/matrix_federation_agent.py index 0ec28c6696..1788e9a34a 100644 --- a/synapse/http/federation/matrix_federation_agent.py +++ b/synapse/http/federation/matrix_federation_agent.py @@ -19,6 +19,7 @@ from zope.interface import implementer from twisted.internet import defer from twisted.internet.endpoints import HostnameEndpoint, wrapClientTLS from twisted.web.client import URI, Agent, HTTPConnectionPool +from twisted.web.http_headers import Headers from twisted.web.iweb import IAgent from synapse.http.endpoint import parse_server_name @@ -109,6 +110,15 @@ class MatrixFederationAgent(object): else: target = pick_server_from_list(server_list) + # make sure that the Host header is set correctly + if headers is None: + headers = Headers() + else: + headers = headers.copy() + + if not headers.hasHeader(b'host'): + headers.addRawHeader(b'host', server_name_bytes) + class EndpointFactory(object): @staticmethod def endpointForURI(_uri): diff --git a/synapse/http/matrixfederationclient.py b/synapse/http/matrixfederationclient.py index 980e912348..bb2e64ed80 100644 --- a/synapse/http/matrixfederationclient.py +++ b/synapse/http/matrixfederationclient.py @@ -255,7 +255,6 @@ class MatrixFederationHttpClient(object): headers_dict = { b"User-Agent": [self.version_string_bytes], - b"Host": [destination_bytes], } with limiter: diff --git a/tests/http/federation/test_matrix_federation_agent.py b/tests/http/federation/test_matrix_federation_agent.py index b32d7566a5..261afb5f41 100644 --- a/tests/http/federation/test_matrix_federation_agent.py +++ b/tests/http/federation/test_matrix_federation_agent.py @@ -131,6 +131,10 @@ class MatrixFederationAgentTests(TestCase): request = http_server.requests[0] self.assertEqual(request.method, b'GET') self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'testserv:8448'] + ) content = request.content.read() self.assertEqual(content, b'') @@ -195,6 +199,10 @@ class MatrixFederationAgentTests(TestCase): request = http_server.requests[0] self.assertEqual(request.method, b'GET') self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'1.2.3.4'], + ) # finish the request request.finish() @@ -235,6 +243,10 @@ class MatrixFederationAgentTests(TestCase): request = http_server.requests[0] self.assertEqual(request.method, b'GET') self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'testserv'], + ) # finish the request request.finish() @@ -276,6 +288,10 @@ class MatrixFederationAgentTests(TestCase): request = http_server.requests[0] self.assertEqual(request.method, b'GET') self.assertEqual(request.path, b'/foo/bar') + self.assertEqual( + request.requestHeaders.getRawHeaders(b'host'), + [b'testserv'], + ) # finish the request request.finish() diff --git a/tests/http/test_fedclient.py b/tests/http/test_fedclient.py index d37f8f9981..018c77ebcd 100644 --- a/tests/http/test_fedclient.py +++ b/tests/http/test_fedclient.py @@ -49,7 +49,6 @@ class FederationClientTests(HomeserverTestCase): return hs def prepare(self, reactor, clock, homeserver): - self.cl = MatrixFederationHttpClient(self.hs) self.reactor.lookups["testserv"] = "1.2.3.4" @@ -95,6 +94,7 @@ class FederationClientTests(HomeserverTestCase): # that should have made it send the request to the transport self.assertRegex(transport.value(), b"^GET /foo/bar") + self.assertRegex(transport.value(), b"Host: testserv:8008") # Deferred is still without a result self.assertNoResult(test_d) From 4a3f1388328f8181c33b74a6a777ab2073d400d8 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Fri, 25 Jan 2019 13:57:52 +0000 Subject: [PATCH 11/11] Fix quoting for allowed_local_3pids example config (#4476) If you use double-quotes here, you have to escape your backslashes. It's much easier with single-quotes. (Note that the existing double-backslashes are already interpreted by python's """ parsing.) --- changelog.d/4476.misc | 1 + synapse/config/registration.py | 6 +++--- 2 files changed, 4 insertions(+), 3 deletions(-) create mode 100644 changelog.d/4476.misc diff --git a/changelog.d/4476.misc b/changelog.d/4476.misc new file mode 100644 index 0000000000..a070e10c7c --- /dev/null +++ b/changelog.d/4476.misc @@ -0,0 +1 @@ +Fix quoting for allowed_local_3pids example config diff --git a/synapse/config/registration.py b/synapse/config/registration.py index fe520d6855..d808a989f3 100644 --- a/synapse/config/registration.py +++ b/synapse/config/registration.py @@ -84,11 +84,11 @@ class RegistrationConfig(Config): # # allowed_local_3pids: # - medium: email - # pattern: ".*@matrix\\.org" + # pattern: '.*@matrix\\.org' # - medium: email - # pattern: ".*@vector\\.im" + # pattern: '.*@vector\\.im' # - medium: msisdn - # pattern: "\\+44" + # pattern: '\\+44' # If set, allows registration by anyone who also has the shared # secret, even if registration is otherwise disabled.