From c3eb7dd9c572cfcea831e4d5f41b5a34cb3c57e8 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 19 Feb 2015 11:50:49 +0000 Subject: [PATCH 01/45] Add config option to set the soft fd limit on start --- synapse/app/homeserver.py | 20 +++++++++++++++++--- synapse/config/server.py | 7 +++++++ 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index ea20de1434..07386f6f28 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -52,6 +52,7 @@ import synapse import logging import os import re +import resource import subprocess import sqlite3 import syweb @@ -269,6 +270,15 @@ def get_version_string(): return ("Synapse/%s" % (synapse.__version__,)).encode("ascii") +def change_resource_limit(soft_file_no): + try: + soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_file_no, hard)) + logger.info("Set file limit to: %d", soft_file_no) + except (ValueError, resource.error) as e: + logger.warn("Failed to set file limit: %s", e) + + def setup(): config = HomeServerConfig.load_config( "Synapse Homeserver", @@ -345,10 +355,11 @@ def setup(): if config.daemonize: print config.pid_file + daemon = Daemonize( app="synapse-homeserver", pid=config.pid_file, - action=run, + action=lambda: run(config), auto_close_fds=False, verbose=True, logger=logger, @@ -356,11 +367,14 @@ def setup(): daemon.start() else: - reactor.run() + run(config) -def run(): +def run(config): with LoggingContext("run"): + if config.soft_file_limit: + change_resource_limit(config.soft_file_limit) + reactor.run() diff --git a/synapse/config/server.py b/synapse/config/server.py index 31e44cc857..a3edf208e9 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -31,6 +31,7 @@ class ServerConfig(Config): self.webclient = True self.manhole = args.manhole self.no_tls = args.no_tls + self.soft_file_limit = args.soft_file_limit if not args.content_addr: host = args.server_name @@ -77,6 +78,12 @@ class ServerConfig(Config): "content repository") server_group.add_argument("--no-tls", action='store_true', help="Don't bind to the https port.") + server_group.add_argument("--soft-file-limit", type=int, default=0, + help="Set the limit on the number of file " + "descriptors synapse can use. Zero " + "is used to indicate synapse should " + "not change the limit from system " + "default.") def read_signing_key(self, signing_key_path): signing_keys = self.read_file(signing_key_path, "signing_key") From 939273c4b0936b95eb14763d8bf1027159fa4cb3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 19 Feb 2015 11:53:13 +0000 Subject: [PATCH 02/45] Rename resource variable so as to not shadow module import --- synapse/app/homeserver.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 07386f6f28..5f671dfd23 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -141,8 +141,8 @@ class SynapseHomeServer(HomeServer): # instead, we'll store a copy of this mapping so we can actually add # extra resources to existing nodes. See self._resource_id for the key. resource_mappings = {} - for (full_path, resource) in desired_tree: - logger.info("Attaching %s to path %s", resource, full_path) + for full_path, res in desired_tree: + logger.info("Attaching %s to path %s", res, full_path) last_resource = self.root_resource for path_seg in full_path.split('/')[1:-1]: if path_seg not in last_resource.listNames(): @@ -173,12 +173,12 @@ class SynapseHomeServer(HomeServer): child_name) child_resource = resource_mappings[child_res_id] # steal the children - resource.putChild(child_name, child_resource) + res.putChild(child_name, child_resource) # finally, insert the desired resource in the right place - last_resource.putChild(last_path_seg, resource) + last_resource.putChild(last_path_seg, res) res_id = self._resource_id(last_resource, last_path_seg) - resource_mappings[res_id] = resource + resource_mappings[res_id] = res return self.root_resource From 0ac2a79faa918280767c18e4db7ec29d7d3a3afb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 19 Feb 2015 17:24:14 +0000 Subject: [PATCH 03/45] Initial stab at implementing a batched get_missing_pdus request --- synapse/federation/federation_server.py | 72 +++++++++++++++++++++++++ synapse/handlers/federation.py | 9 ++-- synapse/storage/event_federation.py | 63 ++++++++++++++++++++-- 3 files changed, 135 insertions(+), 9 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 22b9663831..34bc397e8a 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -305,6 +305,78 @@ class FederationServer(FederationBase): (200, send_content) ) + @defer.inlineCallbacks + def get_missing_events(self, origin, room_id, earliest_events, + latest_events, limit, min_depth): + limit = max(limit, 50) + min_depth = max(min_depth, 0) + + missing_events = yield self.store.get_missing_events( + room_id=room_id, + earliest_events=earliest_events, + latest_events=latest_events, + limit=limit, + min_depth=min_depth, + ) + + known_ids = {e.event_id for e in missing_events} | {earliest_events} + + back_edges = { + e for e in missing_events + if {i for i, h in e.prev_events.items()} <= known_ids + } + + decoded_auth_events = set() + state = {} + auth_events = set() + auth_and_state = {} + for event in back_edges: + state_pdus = yield self.handler.get_state_for_pdu( + origin, room_id, event.event_id, + do_auth=False, + ) + + state[event.event_id] = [s.event_id for s in state_pdus] + + auth_and_state.update({ + s.event_id: s for s in state_pdus + }) + + state_ids = {pdu.event_id for pdu in state_pdus} + prev_ids = {i for i, h in event.prev_events.items()} + partial_auth_chain = yield self.store.get_auth_chain( + state_ids | prev_ids, have_ids=decoded_auth_events.keys() + ) + + for p in partial_auth_chain: + p.signatures.update( + compute_event_signature( + p, + self.hs.hostname, + self.hs.config.signing_key[0] + ) + ) + + auth_events.update( + a.event_id for a in partial_auth_chain + ) + + auth_and_state.update({ + a.event_id: a for a in partial_auth_chain + }) + + time_now = self._clock.time_msec() + + defer.returnValue({ + "events": [ev.get_pdu_json(time_now) for ev in missing_events], + "state_for_events": state, + "auth_events": auth_events, + "event_map": { + k: ev.get_pdu_json(time_now) + for k, ev in auth_and_state.items() + }, + }) + @log_function def _get_persisted_pdu(self, origin, event_id, do_auth=True): """ Get a PDU from the database with given origin and id. diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 0eb2ff95ca..26bdc6d1a7 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -581,12 +581,13 @@ class FederationHandler(BaseHandler): defer.returnValue(event) @defer.inlineCallbacks - def get_state_for_pdu(self, origin, room_id, event_id): + def get_state_for_pdu(self, origin, room_id, event_id, do_auth=True): yield run_on_reactor() - in_room = yield self.auth.check_host_in_room(room_id, origin) - if not in_room: - raise AuthError(403, "Host not in room.") + if do_auth: + in_room = yield self.auth.check_host_in_room(room_id, origin) + if not in_room: + raise AuthError(403, "Host not in room.") state_groups = yield self.store.get_state_groups( [event_id] diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 3fbc090224..22bf7ad832 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -32,15 +32,15 @@ class EventFederationStore(SQLBaseStore): and backfilling from another server respectively. """ - def get_auth_chain(self, event_ids): + def get_auth_chain(self, event_ids, have_ids=set()): return self.runInteraction( "get_auth_chain", self._get_auth_chain_txn, - event_ids + event_ids, have_ids ) - def _get_auth_chain_txn(self, txn, event_ids): - results = self._get_auth_chain_ids_txn(txn, event_ids) + def _get_auth_chain_txn(self, txn, event_ids, have_ids): + results = self._get_auth_chain_ids_txn(txn, event_ids, have_ids) return self._get_events_txn(txn, results) @@ -51,8 +51,9 @@ class EventFederationStore(SQLBaseStore): event_ids ) - def _get_auth_chain_ids_txn(self, txn, event_ids): + def _get_auth_chain_ids_txn(self, txn, event_ids, have_ids): results = set() + have_ids = set(have_ids) base_sql = ( "SELECT auth_id FROM event_auth WHERE event_id = ?" @@ -64,6 +65,10 @@ class EventFederationStore(SQLBaseStore): for f in front: txn.execute(base_sql, (f,)) new_front.update([r[0] for r in txn.fetchall()]) + + new_front -= results + new_front -= have_ids + front = new_front results.update(front) @@ -378,3 +383,51 @@ class EventFederationStore(SQLBaseStore): event_results += new_front return self._get_events_txn(txn, event_results) + + def get_missing_events(self, room_id, earliest_events, latest_events, + limit, min_depth): + return self.runInteraction( + "get_missing_events", + self._get_missing_events, + room_id, earliest_events, latest_events, limit, min_depth + ) + + def _get_missing_events(self, txn, room_id, earliest_events, latest_events, + limit, min_depth): + + earliest_events = set(earliest_events) + front = set(latest_events) - earliest_events + + event_results = set() + + query = ( + "SELECT prev_event_id FROM event_edges " + "WHERE room_id = ? AND event_id = ? AND is_state = 0 " + "LIMIT ?" + ) + + while front and len(event_results) < limit: + new_front = set() + for event_id in front: + txn.execute( + query, + (room_id, event_id, limit - len(event_results)) + ) + + for e_id, in txn.fetchall(): + new_front.add(e_id) + + new_front -= earliest_events + new_front -= event_results + + front = new_front + event_results |= new_front + + events = self._get_events_txn(txn, event_results) + + events = sorted( + [ev for ev in events if ev.depth >= min_depth], + key=lambda e: e.depth, + ) + + return events[:limit] From 7c56210f204ef735444129c4f7b2a393eb7539ec Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 20 Feb 2015 16:09:44 +0000 Subject: [PATCH 04/45] By default set soft limit to hard limit --- synapse/app/homeserver.py | 8 ++++++-- synapse/config/server.py | 10 +++++----- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/synapse/app/homeserver.py b/synapse/app/homeserver.py index 5f671dfd23..89af091f10 100755 --- a/synapse/app/homeserver.py +++ b/synapse/app/homeserver.py @@ -273,7 +273,12 @@ def get_version_string(): def change_resource_limit(soft_file_no): try: soft, hard = resource.getrlimit(resource.RLIMIT_NOFILE) + + if not soft_file_no: + soft_file_no = hard + resource.setrlimit(resource.RLIMIT_NOFILE, (soft_file_no, hard)) + logger.info("Set file limit to: %d", soft_file_no) except (ValueError, resource.error) as e: logger.warn("Failed to set file limit: %s", e) @@ -372,8 +377,7 @@ def setup(): def run(config): with LoggingContext("run"): - if config.soft_file_limit: - change_resource_limit(config.soft_file_limit) + change_resource_limit(config.soft_file_limit) reactor.run() diff --git a/synapse/config/server.py b/synapse/config/server.py index a3edf208e9..4e4892d40b 100644 --- a/synapse/config/server.py +++ b/synapse/config/server.py @@ -79,11 +79,11 @@ class ServerConfig(Config): server_group.add_argument("--no-tls", action='store_true', help="Don't bind to the https port.") server_group.add_argument("--soft-file-limit", type=int, default=0, - help="Set the limit on the number of file " - "descriptors synapse can use. Zero " - "is used to indicate synapse should " - "not change the limit from system " - "default.") + help="Set the soft limit on the number of " + "file descriptors synapse can use. " + "Zero is used to indicate synapse " + "should set the soft limit to the hard" + "limit.") def read_signing_key(self, signing_key_path): signing_keys = self.read_file(signing_key_path, "signing_key") From db215b7e0007a207b8775d78c6693153e16f2731 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Feb 2015 13:58:02 +0000 Subject: [PATCH 05/45] Implement and use new batched get missing pdu --- synapse/federation/federation_client.py | 19 +++ synapse/federation/federation_server.py | 148 ++++++++---------------- synapse/federation/transaction_queue.py | 2 +- synapse/federation/transport/client.py | 19 +++ synapse/federation/transport/server.py | 31 +++++ synapse/handlers/federation.py | 23 ++++ 6 files changed, 143 insertions(+), 99 deletions(-) diff --git a/synapse/federation/federation_client.py b/synapse/federation/federation_client.py index cd3c962d50..ca89a0787c 100644 --- a/synapse/federation/federation_client.py +++ b/synapse/federation/federation_client.py @@ -439,6 +439,25 @@ class FederationClient(FederationBase): defer.returnValue(ret) + @defer.inlineCallbacks + def get_missing_events(self, destination, room_id, earliest_events, + latest_events, limit, min_depth): + content = yield self.transport_layer.get_missing_events( + destination, room_id, earliest_events, latest_events, limit, + min_depth, + ) + + events = [ + self.event_from_pdu_json(e) + for e in content.get("events", []) + ] + + signed_events = yield self._check_sigs_and_hash_and_fetch( + destination, events, outlier=True + ) + + defer.returnValue(signed_events) + def event_from_pdu_json(self, pdu_json, outlier=False): event = FrozenEvent( pdu_json diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 34bc397e8a..f74e16abd5 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -142,7 +142,15 @@ class FederationServer(FederationBase): if r[0]: ret.append({}) else: - logger.exception(r[1]) + failure = r[1] + logger.error( + "Failed to handle PDU", + exc_info=( + failure.type, + failure.value, + failure.getTracebackObject() + ) + ) ret.append({"error": str(r[1].value)}) logger.debug("Returning: %s", str(ret)) @@ -306,75 +314,17 @@ class FederationServer(FederationBase): ) @defer.inlineCallbacks - def get_missing_events(self, origin, room_id, earliest_events, - latest_events, limit, min_depth): - limit = max(limit, 50) - min_depth = max(min_depth, 0) - - missing_events = yield self.store.get_missing_events( - room_id=room_id, - earliest_events=earliest_events, - latest_events=latest_events, - limit=limit, - min_depth=min_depth, + @log_function + def on_get_missing_events(self, origin, room_id, earliest_events, + latest_events, limit, min_depth): + missing_events = yield self.handler.on_get_missing_events( + origin, room_id, earliest_events, latest_events, limit, min_depth ) - known_ids = {e.event_id for e in missing_events} | {earliest_events} - - back_edges = { - e for e in missing_events - if {i for i, h in e.prev_events.items()} <= known_ids - } - - decoded_auth_events = set() - state = {} - auth_events = set() - auth_and_state = {} - for event in back_edges: - state_pdus = yield self.handler.get_state_for_pdu( - origin, room_id, event.event_id, - do_auth=False, - ) - - state[event.event_id] = [s.event_id for s in state_pdus] - - auth_and_state.update({ - s.event_id: s for s in state_pdus - }) - - state_ids = {pdu.event_id for pdu in state_pdus} - prev_ids = {i for i, h in event.prev_events.items()} - partial_auth_chain = yield self.store.get_auth_chain( - state_ids | prev_ids, have_ids=decoded_auth_events.keys() - ) - - for p in partial_auth_chain: - p.signatures.update( - compute_event_signature( - p, - self.hs.hostname, - self.hs.config.signing_key[0] - ) - ) - - auth_events.update( - a.event_id for a in partial_auth_chain - ) - - auth_and_state.update({ - a.event_id: a for a in partial_auth_chain - }) - time_now = self._clock.time_msec() defer.returnValue({ "events": [ev.get_pdu_json(time_now) for ev in missing_events], - "state_for_events": state, - "auth_events": auth_events, - "event_map": { - k: ev.get_pdu_json(time_now) - for k, ev in auth_and_state.items() - }, }) @log_function @@ -403,7 +353,7 @@ class FederationServer(FederationBase): @defer.inlineCallbacks @log_function - def _handle_new_pdu(self, origin, pdu, max_recursion=10): + def _handle_new_pdu(self, origin, pdu, get_missing=True): # We reprocess pdus when we have seen them only as outliers existing = yield self._get_persisted_pdu( origin, pdu.event_id, do_auth=False @@ -455,48 +405,50 @@ class FederationServer(FederationBase): pdu.room_id, min_depth ) + prevs = {e_id for e_id, _ in pdu.prev_events} + seen = set(have_seen.keys()) + if min_depth and pdu.depth < min_depth: # This is so that we don't notify the user about this # message, to work around the fact that some events will # reference really really old events we really don't want to # send to the clients. pdu.internal_metadata.outlier = True - elif min_depth and pdu.depth > min_depth and max_recursion > 0: - for event_id, hashes in pdu.prev_events: - if event_id not in have_seen: - logger.debug( - "_handle_new_pdu requesting pdu %s", - event_id + elif min_depth and pdu.depth > min_depth: + if get_missing and prevs - seen: + latest_tuples = yield self.store.get_latest_events_in_room( + pdu.room_id + ) + + # We add the prev events that we have seen to the latest + # list to ensure the remote server doesn't give them to us + latest = set(e_id for e_id, _, _ in latest_tuples) + latest |= seen + + missing_events = yield self.get_missing_events( + origin, + pdu.room_id, + earliest_events=list(latest), + latest_events=[pdu.event_id], + limit=10, + min_depth=min_depth, + ) + + for e in missing_events: + yield self._handle_new_pdu( + origin, + e, + get_missing=False ) - try: - new_pdu = yield self.federation_client.get_pdu( - [origin, pdu.origin], - event_id=event_id, - ) + have_seen = yield self.store.have_events( + [ev for ev, _ in pdu.prev_events] + ) - if new_pdu: - yield self._handle_new_pdu( - origin, - new_pdu, - max_recursion=max_recursion-1 - ) - - logger.debug("Processed pdu %s", event_id) - else: - logger.warn("Failed to get PDU %s", event_id) - fetch_state = True - except: - # TODO(erikj): Do some more intelligent retries. - logger.exception("Failed to get PDU") - fetch_state = True - else: - prevs = {e_id for e_id, _ in pdu.prev_events} - seen = set(have_seen.keys()) - if prevs - seen: - fetch_state = True - else: - fetch_state = True + prevs = {e_id for e_id, _ in pdu.prev_events} + seen = set(have_seen.keys()) + if prevs - seen: + fetch_state = True if fetch_state: # We need to get the state at this event, since we haven't diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 7d30c924d1..8f1acbe590 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -287,7 +287,7 @@ class TransactionQueue(object): code = 200 if response: - for e_id, r in getattr(response, "pdus", {}).items(): + for e_id, r in response.get("pdus", {}).items(): if "error" in r: logger.warn( "Transaction returned error for %s: %s", diff --git a/synapse/federation/transport/client.py b/synapse/federation/transport/client.py index 8b137e7128..80d03012b7 100644 --- a/synapse/federation/transport/client.py +++ b/synapse/federation/transport/client.py @@ -219,3 +219,22 @@ class TransportLayerClient(object): ) defer.returnValue(content) + + @defer.inlineCallbacks + @log_function + def get_missing_events(self, destination, room_id, earliest_events, + latest_events, limit, min_depth): + path = PREFIX + "/get_missing_events/%s" % (room_id,) + + content = yield self.client.post_json( + destination=destination, + path=path, + data={ + "limit": int(limit), + "min_depth": int(min_depth), + "earliest_events": earliest_events, + "latest_events": latest_events, + } + ) + + defer.returnValue(content) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2ffb37aa18..ad75c8ddb7 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -234,6 +234,7 @@ class TransportLayerServer(object): ) ) ) + self.server.register_path( "POST", re.compile("^" + PREFIX + "/query_auth/([^/]*)/([^/]*)$"), @@ -245,6 +246,17 @@ class TransportLayerServer(object): ) ) + self.server.register_path( + "POST", + re.compile("^" + PREFIX + "/get_missing_events/([^/]*)/?$"), + self._with_authentication( + lambda origin, content, query, room_id: + self._get_missing_events( + origin, content, room_id, + ) + ) + ) + @defer.inlineCallbacks @log_function def _on_send_request(self, origin, content, query, transaction_id): @@ -344,3 +356,22 @@ class TransportLayerServer(object): ) defer.returnValue((200, new_content)) + + @defer.inlineCallbacks + @log_function + def _get_missing_events(self, origin, content, room_id): + limit = int(content.get("limit", 10)) + min_depth = int(content.get("min_depth", 0)) + earliest_events = content.get("earliest_events", []) + latest_events = content.get("latest_events", []) + + content = yield self.request_handler.on_get_missing_events( + origin, + room_id=room_id, + earliest_events=earliest_events, + latest_events=latest_events, + min_depth=min_depth, + limit=limit, + ) + + defer.returnValue((200, content)) diff --git a/synapse/handlers/federation.py b/synapse/handlers/federation.py index 26bdc6d1a7..628e62f8b1 100644 --- a/synapse/handlers/federation.py +++ b/synapse/handlers/federation.py @@ -789,6 +789,29 @@ class FederationHandler(BaseHandler): defer.returnValue(ret) + @defer.inlineCallbacks + def on_get_missing_events(self, origin, room_id, earliest_events, + latest_events, limit, min_depth): + in_room = yield self.auth.check_host_in_room( + room_id, + origin + ) + if not in_room: + raise AuthError(403, "Host not in room.") + + limit = min(limit, 20) + min_depth = max(min_depth, 0) + + missing_events = yield self.store.get_missing_events( + room_id=room_id, + earliest_events=earliest_events, + latest_events=latest_events, + limit=limit, + min_depth=min_depth, + ) + + defer.returnValue(missing_events) + @defer.inlineCallbacks @log_function def do_auth(self, origin, event, context, auth_events): From 42b972bccd0cf7d903befb498f9c1bbd5c4e6583 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Feb 2015 14:35:23 +0000 Subject: [PATCH 06/45] Revert get_auth_chain changes --- synapse/storage/event_federation.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/synapse/storage/event_federation.py b/synapse/storage/event_federation.py index 22bf7ad832..2deda8ac50 100644 --- a/synapse/storage/event_federation.py +++ b/synapse/storage/event_federation.py @@ -32,15 +32,15 @@ class EventFederationStore(SQLBaseStore): and backfilling from another server respectively. """ - def get_auth_chain(self, event_ids, have_ids=set()): + def get_auth_chain(self, event_ids): return self.runInteraction( "get_auth_chain", self._get_auth_chain_txn, - event_ids, have_ids + event_ids ) - def _get_auth_chain_txn(self, txn, event_ids, have_ids): - results = self._get_auth_chain_ids_txn(txn, event_ids, have_ids) + def _get_auth_chain_txn(self, txn, event_ids): + results = self._get_auth_chain_ids_txn(txn, event_ids) return self._get_events_txn(txn, results) @@ -51,9 +51,8 @@ class EventFederationStore(SQLBaseStore): event_ids ) - def _get_auth_chain_ids_txn(self, txn, event_ids, have_ids): + def _get_auth_chain_ids_txn(self, txn, event_ids): results = set() - have_ids = set(have_ids) base_sql = ( "SELECT auth_id FROM event_auth WHERE event_id = ?" @@ -67,7 +66,6 @@ class EventFederationStore(SQLBaseStore): new_front.update([r[0] for r in txn.fetchall()]) new_front -= results - new_front -= have_ids front = new_front results.update(front) From 852816befe678f0061dc153fab20e1d4c70a9094 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 23 Feb 2015 15:14:09 +0000 Subject: [PATCH 07/45] Fix presence tests --- tests/handlers/test_presence.py | 43 +++++++++++++++++++++------------ 1 file changed, 28 insertions(+), 15 deletions(-) diff --git a/tests/handlers/test_presence.py b/tests/handlers/test_presence.py index d88a977be4..6ffc3c99cc 100644 --- a/tests/handlers/test_presence.py +++ b/tests/handlers/test_presence.py @@ -389,14 +389,18 @@ class PresenceInvitesTestCase(PresenceTestCase): @defer.inlineCallbacks def test_invite_remote(self): + # Use a different destination, otherwise retry logic might fail the + # request + u_rocket = UserID.from_string("@rocket:there") + put_json = self.mock_http_client.put_json put_json.expect_call_and_return( - call("elsewhere", + call("there", path="/_matrix/federation/v1/send/1000000/", - data=_expect_edu("elsewhere", "m.presence_invite", + data=_expect_edu("there", "m.presence_invite", content={ "observer_user": "@apple:test", - "observed_user": "@cabbage:elsewhere", + "observed_user": "@rocket:there", } ), json_data_callback=ANY, @@ -405,10 +409,10 @@ class PresenceInvitesTestCase(PresenceTestCase): ) yield self.handler.send_invite( - observer_user=self.u_apple, observed_user=self.u_cabbage) + observer_user=self.u_apple, observed_user=u_rocket) self.assertEquals( - [{"observed_user_id": "@cabbage:elsewhere", "accepted": 0}], + [{"observed_user_id": "@rocket:there", "accepted": 0}], (yield self.datastore.get_presence_list(self.u_apple.localpart)) ) @@ -418,13 +422,18 @@ class PresenceInvitesTestCase(PresenceTestCase): def test_accept_remote(self): # TODO(paul): This test will likely break if/when real auth permissions # are added; for now the HS will always accept any invite + + # Use a different destination, otherwise retry logic might fail the + # request + u_rocket = UserID.from_string("@rocket:moon") + put_json = self.mock_http_client.put_json put_json.expect_call_and_return( - call("elsewhere", + call("moon", path="/_matrix/federation/v1/send/1000000/", - data=_expect_edu("elsewhere", "m.presence_accept", + data=_expect_edu("moon", "m.presence_accept", content={ - "observer_user": "@cabbage:elsewhere", + "observer_user": "@rocket:moon", "observed_user": "@apple:test", } ), @@ -437,7 +446,7 @@ class PresenceInvitesTestCase(PresenceTestCase): "/_matrix/federation/v1/send/1000000/", _make_edu_json("elsewhere", "m.presence_invite", content={ - "observer_user": "@cabbage:elsewhere", + "observer_user": "@rocket:moon", "observed_user": "@apple:test", } ) @@ -446,7 +455,7 @@ class PresenceInvitesTestCase(PresenceTestCase): self.assertTrue( (yield self.datastore.is_presence_visible( observed_localpart=self.u_apple.localpart, - observer_userid=self.u_cabbage.to_string(), + observer_userid=u_rocket.to_string(), )) ) @@ -454,13 +463,17 @@ class PresenceInvitesTestCase(PresenceTestCase): @defer.inlineCallbacks def test_invited_remote_nonexistant(self): + # Use a different destination, otherwise retry logic might fail the + # request + u_rocket = UserID.from_string("@rocket:sun") + put_json = self.mock_http_client.put_json put_json.expect_call_and_return( - call("elsewhere", + call("sun", path="/_matrix/federation/v1/send/1000000/", - data=_expect_edu("elsewhere", "m.presence_deny", + data=_expect_edu("sun", "m.presence_deny", content={ - "observer_user": "@cabbage:elsewhere", + "observer_user": "@rocket:sun", "observed_user": "@durian:test", } ), @@ -471,9 +484,9 @@ class PresenceInvitesTestCase(PresenceTestCase): yield self.mock_federation_resource.trigger("PUT", "/_matrix/federation/v1/send/1000000/", - _make_edu_json("elsewhere", "m.presence_invite", + _make_edu_json("sun", "m.presence_invite", content={ - "observer_user": "@cabbage:elsewhere", + "observer_user": "@rocket:sun", "observed_user": "@durian:test", } ) From 2d20466f9a1349c97d5a3822eb4ee64f19bbdf27 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 25 Feb 2015 15:00:59 +0000 Subject: [PATCH 08/45] Add stub functions and work out execution flow to implement AS event stream polling. --- synapse/handlers/events.py | 3 --- synapse/handlers/room.py | 34 +++++++++++++++++++++++++--------- synapse/storage/appservice.py | 19 +++++++++++++++++++ synapse/storage/stream.py | 21 +++++++++++++++++++++ 4 files changed, 65 insertions(+), 12 deletions(-) diff --git a/synapse/handlers/events.py b/synapse/handlers/events.py index 025e7e7e62..8d5f5c8499 100644 --- a/synapse/handlers/events.py +++ b/synapse/handlers/events.py @@ -69,9 +69,6 @@ class EventStreamHandler(BaseHandler): ) self._streams_per_user[auth_user] += 1 - if pagin_config.from_token is None: - pagin_config.from_token = None - rm_handler = self.hs.get_handlers().room_member_handler room_ids = yield rm_handler.get_rooms_for_user(auth_user) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index 914742d913..a8b0c95636 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -510,9 +510,16 @@ class RoomMemberHandler(BaseHandler): def get_rooms_for_user(self, user, membership_list=[Membership.JOIN]): """Returns a list of roomids that the user has any of the given membership states in.""" - rooms = yield self.store.get_rooms_for_user_where_membership_is( - user_id=user.to_string(), membership_list=membership_list + + app_service = yield self.store.get_app_service_by_user_id( + user.to_string() ) + if app_service: + rooms = yield self.store.get_app_service_rooms(app_service) + else: + rooms = yield self.store.get_rooms_for_user_where_membership_is( + user_id=user.to_string(), membership_list=membership_list + ) # For some reason the list of events contains duplicates # TODO(paul): work out why because I really don't think it should @@ -559,13 +566,22 @@ class RoomEventSource(object): to_key = yield self.get_current_key() - events, end_key = yield self.store.get_room_events_stream( - user_id=user.to_string(), - from_key=from_key, - to_key=to_key, - room_id=None, - limit=limit, - ) + app_service = self.store.get_app_service_by_user_id(user.to_string()) + if app_service: + events, end_key = yield self.store.get_appservice_room_stream( + service=app_service, + from_key=from_key, + to_key=to_key, + limit=limit, + ) + else: + events, end_key = yield self.store.get_room_events_stream( + user_id=user.to_string(), + from_key=from_key, + to_key=to_key, + room_id=None, + limit=limit, + ) defer.returnValue((events, end_key)) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index dc3666efd4..435ccfd6fc 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -17,6 +17,7 @@ from twisted.internet import defer from synapse.api.errors import StoreError from synapse.appservice import ApplicationService +from synapse.storage.roommember import RoomsForUser from ._base import SQLBaseStore @@ -150,6 +151,16 @@ class ApplicationServiceStore(SQLBaseStore): yield self.cache_defer # make sure the cache is ready defer.returnValue(self.services_cache) + @defer.inlineCallbacks + def get_app_service_by_user_id(self, user_id): + yield self.cache_defer # make sure the cache is ready + + for service in self.services_cache: + if service.sender == user_id: + defer.returnValue(service) + return + defer.returnValue(None) + @defer.inlineCallbacks def get_app_service_by_token(self, token, from_cache=True): """Get the application service with the given token. @@ -173,6 +184,14 @@ class ApplicationServiceStore(SQLBaseStore): # TODO: The from_cache=False impl # TODO: This should be JOINed with the application_services_regex table. + @defer.inlineCallbacks + def get_app_service_rooms(self, service): + logger.info("get_app_service_rooms -> %s", service) + + # TODO stub + yield self.cache_defer + defer.returnValue([RoomsForUser("!foo:bar", service.sender, "join")]) + @defer.inlineCallbacks def _populate_cache(self): """Populates the ApplicationServiceCache from the database.""" diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 3ccb6f8a61..aa3c9f8c9c 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -127,6 +127,27 @@ class _StreamToken(namedtuple("_StreamToken", "topological stream")): class StreamStore(SQLBaseStore): + + def get_appservice_room_stream(self, service, from_key, to_key, limit=0): + # NB this lives here instead of appservice.py so we can reuse the + # 'private' StreamToken class in this file. + logger.info("get_appservice_room_stream -> %s", service) + + if limit: + limit = max(limit, MAX_STREAM_SIZE) + else: + limit = MAX_STREAM_SIZE + + # From and to keys should be integers from ordering. + from_id = _StreamToken.parse_stream_token(from_key) + to_id = _StreamToken.parse_stream_token(to_key) + + if from_key == to_key: + return defer.succeed(([], to_key)) + + # TODO stub + return defer.succeed(([], to_key)) + @log_function def get_room_events_stream(self, user_id, from_key, to_key, room_id, limit=0, with_feedback=False): From 2b8ca84296b228b7cef09244605e4f2760349538 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 25 Feb 2015 17:15:25 +0000 Subject: [PATCH 09/45] Add support for extracting matching room_ids and room_aliases for a given AS. --- synapse/storage/appservice.py | 50 +++++++++++++++++++++++++++++++++-- synapse/storage/directory.py | 23 ++++++++++++++++ synapse/storage/room.py | 11 ++++++++ 3 files changed, 82 insertions(+), 2 deletions(-) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 435ccfd6fc..c8f0ce44f4 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -153,6 +153,19 @@ class ApplicationServiceStore(SQLBaseStore): @defer.inlineCallbacks def get_app_service_by_user_id(self, user_id): + """Retrieve an application service from their user ID. + + All application services have associated with them a particular user ID. + There is no distinguishing feature on the user ID which indicates it + represents an application service. This function allows you to map from + a user ID to an application service. + + Args: + user_id(str): The user ID to see if it is an application service. + Returns: + synapse.appservice.ApplicationService or None. + """ + yield self.cache_defer # make sure the cache is ready for service in self.services_cache: @@ -163,7 +176,7 @@ class ApplicationServiceStore(SQLBaseStore): @defer.inlineCallbacks def get_app_service_by_token(self, token, from_cache=True): - """Get the application service with the given token. + """Get the application service with the given appservice token. Args: token (str): The application service token. @@ -186,10 +199,43 @@ class ApplicationServiceStore(SQLBaseStore): @defer.inlineCallbacks def get_app_service_rooms(self, service): - logger.info("get_app_service_rooms -> %s", service) + """Get a list of RoomsForUser for this application service. + + Application services may be "interested" in lots of rooms depending on + the room ID, the room aliases, or the members in the room. This function + takes all of these into account and returns a list of RoomsForUser which + represent the entire list of room IDs that this application service + wants to know about. + + Args: + service: The application service to get a room list for. + Returns: + A list of RoomsForUser. + """ + # FIXME: This is assuming that this store has methods from + # RoomStore, DirectoryStore, which is a bad assumption to + # make as it makes testing trickier and coupling less obvious. + + # get all rooms matching the room ID regex. + room_entries = yield self.get_all_rooms() # RoomEntry list + matching_room_id_list = [ + r.room_id for r in room_entries if + service.is_interested_in_room(r.room_id) + ] + + # resolve room IDs for matching room alias regex. + room_alias_mappings = yield self.get_all_associations() + matching_alias_list = [ + r.room_id for r in room_alias_mappings if + service.is_interested_in_alias(r.room_alias) + ] + + # get all rooms for every user for this AS. # TODO stub yield self.cache_defer + + defer.returnValue([RoomsForUser("!foo:bar", service.sender, "join")]) @defer.inlineCallbacks diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 68b7d59693..e13b336934 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -134,6 +134,29 @@ class DirectoryStore(SQLBaseStore): return room_id + @defer.inlineCallbacks + def get_all_associations(self): + """Retrieve the entire list of room alias -> room ID pairings. + + Returns: + A list of RoomAliasMappings. + """ + results = self._simple_select_list( + "room_aliases", + None, + ["room_alias", "room_id"] + ) + # TODO(kegan): It feels wrong to be specifying no servers here, but + # equally this function isn't required to obtain all servers so + # retrieving them "just for the sake of it" also seems wrong, but we + # want to conform to passing Objects around and not dicts.. + return [ + RoomAliasMapping( + room_id=r["room_id"], room_alias=r["room_alias"], servers="" + ) for r in results + ] + + def get_aliases_for_room(self, room_id): return self._simple_select_onecol( "room_aliases", diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 750b17a45f..3a64693404 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -71,6 +71,17 @@ class RoomStore(SQLBaseStore): RoomsTable.decode_single_result, query, room_id, ) + def get_all_rooms(self): + """Retrieve all the rooms. + + Returns: + A list of namedtuples containing the room information. + """ + query = RoomsTable.select_statement() + return self._execute( + RoomsTable.decode_results, query, + ) + @defer.inlineCallbacks def get_rooms(self, is_public): """Retrieve a list of all public rooms. From 2c79c4dc7f638f1cb823903a2f8bb1005fda4a2c Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 25 Feb 2015 17:37:14 +0000 Subject: [PATCH 10/45] Fix alias query. --- synapse/storage/directory.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index e13b336934..70c8c8ccd3 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -141,20 +141,19 @@ class DirectoryStore(SQLBaseStore): Returns: A list of RoomAliasMappings. """ - results = self._simple_select_list( - "room_aliases", - None, - ["room_alias", "room_id"] + results = yield self._execute_and_decode( + "SELECT room_id, room_alias FROM room_aliases" ) + # TODO(kegan): It feels wrong to be specifying no servers here, but # equally this function isn't required to obtain all servers so # retrieving them "just for the sake of it" also seems wrong, but we # want to conform to passing Objects around and not dicts.. - return [ + defer.returnValue([ RoomAliasMapping( room_id=r["room_id"], room_alias=r["room_alias"], servers="" ) for r in results - ] + ]) def get_aliases_for_room(self, room_id): From 978ce87c86e60fd49f078d5bea79715abea6d236 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 25 Feb 2015 17:37:48 +0000 Subject: [PATCH 11/45] Comment unused variables. --- synapse/storage/stream.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index aa3c9f8c9c..3c8f3320f1 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -139,8 +139,8 @@ class StreamStore(SQLBaseStore): limit = MAX_STREAM_SIZE # From and to keys should be integers from ordering. - from_id = _StreamToken.parse_stream_token(from_key) - to_id = _StreamToken.parse_stream_token(to_key) + # from_id = _StreamToken.parse_stream_token(from_key) + # to_id = _StreamToken.parse_stream_token(to_key) if from_key == to_key: return defer.succeed(([], to_key)) From 29267cf9d7fbacdfcccaaef9160657f24b9aca14 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Wed, 25 Feb 2015 17:42:28 +0000 Subject: [PATCH 12/45] PEP8 and pyflakes --- synapse/storage/appservice.py | 8 +++----- synapse/storage/directory.py | 1 - 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index c8f0ce44f4..017b6d1e86 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -229,12 +229,10 @@ class ApplicationServiceStore(SQLBaseStore): r.room_id for r in room_alias_mappings if service.is_interested_in_alias(r.room_alias) ] + logging.debug(matching_alias_list) + logging.debug(matching_room_id_list) - # get all rooms for every user for this AS. - - # TODO stub - yield self.cache_defer - + # TODO get all rooms for every user for this AS. defer.returnValue([RoomsForUser("!foo:bar", service.sender, "join")]) diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index 70c8c8ccd3..e391239a3c 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -155,7 +155,6 @@ class DirectoryStore(SQLBaseStore): ) for r in results ]) - def get_aliases_for_room(self, room_id): return self._simple_select_onecol( "room_aliases", From 92478e96d6f6992146102599ca96b8dcacbf3895 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 26 Feb 2015 14:35:28 +0000 Subject: [PATCH 13/45] Finish impl to extract all room IDs an AS may be interested in when polling the event stream. --- synapse/storage/appservice.py | 35 +++++++++++++++++++++++++++------ synapse/storage/registration.py | 7 +++++++ 2 files changed, 36 insertions(+), 6 deletions(-) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 017b6d1e86..d0632d55d1 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -213,8 +213,9 @@ class ApplicationServiceStore(SQLBaseStore): A list of RoomsForUser. """ # FIXME: This is assuming that this store has methods from - # RoomStore, DirectoryStore, which is a bad assumption to - # make as it makes testing trickier and coupling less obvious. + # RoomStore, DirectoryStore, RegistrationStore, RoomMemberStore which is + # a bad assumption to make as it makes testing trickier and coupling + # less obvious. # get all rooms matching the room ID regex. room_entries = yield self.get_all_rooms() # RoomEntry list @@ -229,12 +230,34 @@ class ApplicationServiceStore(SQLBaseStore): r.room_id for r in room_alias_mappings if service.is_interested_in_alias(r.room_alias) ] - logging.debug(matching_alias_list) - logging.debug(matching_room_id_list) + room_ids_matching_alias_or_id = set( + matching_room_id_list + matching_alias_list + ) - # TODO get all rooms for every user for this AS. + # get all rooms for every user for this AS. This is scoped to users on + # this HS only. + user_list = yield self.get_all_users() + user_list = [ + u["name"] for u in user_list if + service.is_interested_in_user(u["name"]) + ] + rooms_for_user_matching_user_id = [] # RoomsForUser list + for user_id in user_list: + rooms_for_user = yield self.get_rooms_for_user(user_id) + rooms_for_user_matching_user_id += rooms_for_user + rooms_for_user_matching_user_id = set(rooms_for_user_matching_user_id) - defer.returnValue([RoomsForUser("!foo:bar", service.sender, "join")]) + # make RoomsForUser tuples for room ids and aliases which are not in the + # main rooms_for_user_list - e.g. they are rooms which do not have AS + # registered users in it. + known_room_ids = [r.room_id for r in rooms_for_user_matching_user_id] + missing_rooms_for_user = [ + RoomsForUser(r, service.sender, "join") for r in + room_ids_matching_alias_or_id if r not in known_room_ids + ] + rooms_for_user_matching_user_id |= set(missing_rooms_for_user) + + defer.returnValue(rooms_for_user_matching_user_id) @defer.inlineCallbacks def _populate_cache(self): diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 029b07cc66..7aff3dbd33 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -92,6 +92,13 @@ class RegistrationStore(SQLBaseStore): query, user_id ) + def get_all_users(self): + query = ("SELECT users.name FROM users") + return self._execute( + self.cursor_to_dict, + query + ) + def get_user_by_token(self, token): """Get a user from the given access token. From 59362454ddbcb3adf64d81da00dcd85cdb59a2e6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 26 Feb 2015 15:47:35 +0000 Subject: [PATCH 14/45] Must update pending_transactions map before yield'ing --- synapse/federation/transaction_queue.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/synapse/federation/transaction_queue.py b/synapse/federation/transaction_queue.py index 8f1acbe590..741a4e7a1a 100644 --- a/synapse/federation/transaction_queue.py +++ b/synapse/federation/transaction_queue.py @@ -224,6 +224,8 @@ class TransactionQueue(object): ] try: + self.pending_transactions[destination] = 1 + limiter = yield get_retry_limiter( destination, self._clock, @@ -239,8 +241,6 @@ class TransactionQueue(object): len(pending_failures) ) - self.pending_transactions[destination] = 1 - logger.debug("TX [%s] Persisting transaction...", destination) transaction = Transaction.create_new( From 93d90765c4b09bc870fc91c6ddcf21fd4389659d Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Thu, 26 Feb 2015 16:15:26 +0000 Subject: [PATCH 15/45] Initial implementation of federation server rate limiting --- synapse/federation/transport/__init__.py | 12 +- synapse/federation/transport/server.py | 175 ++++++++++++++++++++++- 2 files changed, 182 insertions(+), 5 deletions(-) diff --git a/synapse/federation/transport/__init__.py b/synapse/federation/transport/__init__.py index 6800ac46c5..7028ca6947 100644 --- a/synapse/federation/transport/__init__.py +++ b/synapse/federation/transport/__init__.py @@ -21,7 +21,7 @@ support HTTPS), however individual pairings of servers may decide to communicate over a different (albeit still reliable) protocol. """ -from .server import TransportLayerServer +from .server import TransportLayerServer, FederationRateLimiter from .client import TransportLayerClient @@ -55,8 +55,18 @@ class TransportLayer(TransportLayerServer, TransportLayerClient): send requests """ self.keyring = homeserver.get_keyring() + self.clock = homeserver.get_clock() self.server_name = server_name self.server = server self.client = client self.request_handler = None self.received_handler = None + + self.ratelimiter = FederationRateLimiter( + self.clock, + window_size=10000, + sleep_limit=10, + sleep_msec=500, + reject_limit=50, + concurrent_requests=3, + ) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 2ffb37aa18..a9e625f127 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -16,9 +16,11 @@ from twisted.internet import defer from synapse.api.urls import FEDERATION_PREFIX as PREFIX -from synapse.api.errors import Codes, SynapseError +from synapse.api.errors import Codes, SynapseError, LimitExceededError +from synapse.util.async import sleep from synapse.util.logutils import log_function +import collections import logging import simplejson as json import re @@ -27,6 +29,163 @@ import re logger = logging.getLogger(__name__) +class FederationRateLimiter(object): + def __init__(self, clock, window_size, sleep_limit, sleep_msec, + reject_limit, concurrent_requests): + self.clock = clock + + self.window_size = window_size + self.sleep_limit = sleep_limit + self.sleep_msec = sleep_msec + self.reject_limit = reject_limit + self.concurrent_requests = concurrent_requests + + self.ratelimiters = {} + + def ratelimit(self, host): + return self.ratelimiters.setdefault( + host, + PerHostRatelimiter( + clock=self.clock, + window_size=self.window_size, + sleep_limit=self.sleep_limit, + sleep_msec=self.sleep_msec, + reject_limit=self.reject_limit, + concurrent_requests=self.concurrent_requests, + ) + ).ratelimit() + + +class PerHostRatelimiter(object): + def __init__(self, clock, window_size, sleep_limit, sleep_msec, + reject_limit, concurrent_requests): + self.clock = clock + + self.window_size = window_size + self.sleep_limit = sleep_limit + self.sleep_msec = sleep_msec + self.reject_limit = reject_limit + self.concurrent_requests = concurrent_requests + + self.sleeping_requests = set() + self.ready_request_queue = collections.OrderedDict() + self.current_processing = set() + self.request_times = [] + + def is_empty(self): + time_now = self.clock.time_msec() + self.request_times[:] = [ + r for r in self.request_times + if time_now - r < self.window_size + ] + + return not ( + self.ready_request_queue + or self.sleeping_requests + or self.current_processing + or self.request_times + ) + + def ratelimit(self): + request_id = object() + + def on_enter(): + return self._on_enter(request_id) + + def on_exit(exc_type, exc_val, exc_tb): + return self._on_exit(request_id) + + return ContextManagerFunction(on_enter, on_exit) + + def _on_enter(self, request_id): + time_now = self.clock.time_msec() + self.request_times[:] = [ + r for r in self.request_times + if time_now - r < self.window_size + ] + + queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) + if queue_size > self.reject_limit: + raise LimitExceededError( + retry_after_ms=int( + self.window_size / self.sleep_limit + ), + ) + + self.request_times.append(time_now) + + def queue_request(): + if len(self.current_processing) > self.concurrent_requests: + logger.debug("Ratelimit [%s]: Queue req", id(request_id)) + queue_defer = defer.Deferred() + self.ready_request_queue[request_id] = queue_defer + return queue_defer + else: + return defer.succeed(None) + + logger.debug("Ratelimit [%s]: len(self.request_times)=%d", id(request_id), len(self.request_times)) + logger.debug("Ratelimit [%s]: len(self.request_times)=%d", id(request_id), len(self.request_times)) + + if len(self.request_times) > self.sleep_limit: + logger.debug("Ratelimit [%s]: sleeping req", id(request_id)) + ret_defer = sleep(self.sleep_msec/1000.0) + + self.sleeping_requests.add(request_id) + + def on_wait_finished(_): + logger.debug("Ratelimit [%s]: Finished sleeping", id(request_id)) + self.sleeping_requests.discard(request_id) + queue_defer = queue_request() + return queue_defer + + ret_defer.addBoth(on_wait_finished) + else: + ret_defer = queue_request() + + def on_start(r): + logger.debug("Ratelimit [%s]: Processing req", id(request_id)) + self.current_processing.add(request_id) + return r + + def on_err(r): + self.current_processing.discard(request_id) + return r + + def on_both(r): + # Ensure that we've properly cleaned up. + self.sleeping_requests.discard(request_id) + self.ready_request_queue.pop(request_id, None) + return r + + ret_defer.addCallbacks(on_start, on_err) + ret_defer.addBoth(on_both) + return ret_defer + + def _on_exit(self, request_id): + logger.debug("Ratelimit [%s]: Processed req", id(request_id)) + self.current_processing.discard(request_id) + try: + request_id, deferred = self.ready_request_queue.popitem() + self.current_processing.add(request_id) + deferred.callback(None) + except KeyError: + pass + + +class ContextManagerFunction(object): + def __init__(self, on_enter, on_exit): + self.on_enter = on_enter + self.on_exit = on_exit + + def __enter__(self): + if self.on_enter: + return self.on_enter() + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.on_exit: + return self.on_exit(exc_type, exc_val, exc_tb) + + class TransportLayerServer(object): """Handles incoming federation HTTP requests""" @@ -98,15 +257,23 @@ class TransportLayerServer(object): def new_handler(request, *args, **kwargs): try: (origin, content) = yield self._authenticate_request(request) - response = yield handler( - origin, content, request.args, *args, **kwargs - ) + with self.ratelimiter.ratelimit(origin) as d: + yield d + response = yield handler( + origin, content, request.args, *args, **kwargs + ) except: logger.exception("_authenticate_request failed") raise defer.returnValue(response) return new_handler + def rate_limit_origin(self, handler): + def new_handler(origin, *args, **kwargs): + response = yield handler(origin, *args, **kwargs) + defer.returnValue(response) + return new_handler() + @log_function def register_received_handler(self, handler): """ Register a handler that will be fired when we receive data. From dcec7175dc30754603618351b59bc72ff41d305b Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 26 Feb 2015 16:23:01 +0000 Subject: [PATCH 16/45] Finish impl to get new events for AS. ASes should now be able to poll /events --- synapse/handlers/room.py | 4 ++- synapse/storage/stream.py | 62 ++++++++++++++++++++++++++++++++++----- 2 files changed, 58 insertions(+), 8 deletions(-) diff --git a/synapse/handlers/room.py b/synapse/handlers/room.py index a8b0c95636..80f7ee3f12 100644 --- a/synapse/handlers/room.py +++ b/synapse/handlers/room.py @@ -566,7 +566,9 @@ class RoomEventSource(object): to_key = yield self.get_current_key() - app_service = self.store.get_app_service_by_user_id(user.to_string()) + app_service = yield self.store.get_app_service_by_user_id( + user.to_string() + ) if app_service: events, end_key = yield self.store.get_appservice_room_stream( service=app_service, diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 3c8f3320f1..6946e9fe70 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -128,25 +128,73 @@ class _StreamToken(namedtuple("_StreamToken", "topological stream")): class StreamStore(SQLBaseStore): + @defer.inlineCallbacks def get_appservice_room_stream(self, service, from_key, to_key, limit=0): # NB this lives here instead of appservice.py so we can reuse the # 'private' StreamToken class in this file. - logger.info("get_appservice_room_stream -> %s", service) - if limit: limit = max(limit, MAX_STREAM_SIZE) else: limit = MAX_STREAM_SIZE # From and to keys should be integers from ordering. - # from_id = _StreamToken.parse_stream_token(from_key) - # to_id = _StreamToken.parse_stream_token(to_key) + from_id = _StreamToken.parse_stream_token(from_key) + to_id = _StreamToken.parse_stream_token(to_key) if from_key == to_key: - return defer.succeed(([], to_key)) + defer.returnValue(([], to_key)) + return - # TODO stub - return defer.succeed(([], to_key)) + # Logic: + # - We want ALL events which match the AS room_id regex + # - We want ALL events which match the rooms represented by the AS + # room_alias regex + # - We want ALL events for rooms that AS users have joined. + # This is currently supported via get_app_service_rooms (which is used + # for the Notifier listener rooms). We can't reasonably make a SQL + # query for these room IDs, so we'll pull all the events between from/to + # and filter in python. + rooms_for_as = yield self.get_app_service_rooms(service) + room_ids_for_as = [r.room_id for r in rooms_for_as] + + # select all the events between from/to with a sensible limit + sql = ( + "SELECT e.event_id, e.room_id, e.stream_ordering FROM events AS e " + "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " + "ORDER BY stream_ordering ASC LIMIT %(limit)d " + ) % { + "limit": limit + } + + def f(txn): + txn.execute(sql, (from_id.stream, to_id.stream,)) + + rows = self.cursor_to_dict(txn) + + ret = self._get_events_txn( + txn, + # apply the filter on the room id list + [ + r["event_id"] for r in rows + if r["room_id"] in room_ids_for_as + ], + get_prev_content=True + ) + + self._set_before_and_after(ret, rows) + + if rows: + key = "s%d" % max([r["stream_ordering"] for r in rows]) + + else: + # Assume we didn't get anything because there was nothing to + # get. + key = to_key + + return ret, key + + results = yield self.runInteraction("get_appservice_room_stream", f) + defer.returnValue(results) @log_function def get_room_events_stream(self, user_id, from_key, to_key, room_id, From 210ef791007fdd58375db7ce6d7f1b5681e382ca Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 26 Feb 2015 16:25:37 +0000 Subject: [PATCH 17/45] Update CHANGES --- CHANGES.rst | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGES.rst b/CHANGES.rst index 9e36f6232d..ecc30b4672 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -4,6 +4,8 @@ Changes in synapse vx.x.x (x-x-x) * Add support for registration fallback. This is a page hosted on the server which allows a user to register for an account, regardless of what client they are using (e.g. mobile devices). +* Application services can now poll on the CS API ``/events`` for their events, + by providing their application service ``access_token``. Changes in synapse v0.7.1 (2015-02-19) ====================================== From f0995436e7951448d8be3c372f4002845a111a7d Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Thu, 26 Feb 2015 17:21:17 +0000 Subject: [PATCH 18/45] Check for membership invite events correctly. --- synapse/storage/stream.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 6946e9fe70..5d01ecf200 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -36,12 +36,14 @@ what sort order was used: from twisted.internet import defer from ._base import SQLBaseStore +from synapse.api.constants import EventTypes from synapse.api.errors import SynapseError from synapse.util.logutils import log_function from collections import namedtuple import logging +import simplejson logger = logging.getLogger(__name__) @@ -159,13 +161,30 @@ class StreamStore(SQLBaseStore): # select all the events between from/to with a sensible limit sql = ( - "SELECT e.event_id, e.room_id, e.stream_ordering FROM events AS e " + "SELECT e.event_id, e.room_id, e.type, e.unrecognized_keys, " + "e.stream_ordering FROM events AS e " "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " "ORDER BY stream_ordering ASC LIMIT %(limit)d " ) % { "limit": limit } + + def app_service_interested(row): + if row["room_id"] in room_ids_for_as: + return True + + if row["type"] == EventTypes.Member: + # load up the content to inspect if some user the AS is + # interested in was invited to a room. We'll be passing this + # through _get_events_txn later, so ignore the fact that this + # may be a redacted event. + event_content = simplejson.loads(row["unrecognized_keys"]) + if (service.is_interested_in_user( + event_content.get("state_key"))): + return True + return False + def f(txn): txn.execute(sql, (from_id.stream, to_id.stream,)) @@ -176,7 +195,7 @@ class StreamStore(SQLBaseStore): # apply the filter on the room id list [ r["event_id"] for r in rows - if r["room_id"] in room_ids_for_as + if app_service_interested(r) ], get_prev_content=True ) From 1cc77145d483c4a6cea75fafbe83a1661b8b61cd Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 09:39:12 +0000 Subject: [PATCH 19/45] Notify appservices of invites mid-poll. This requires the notifier to have knowledge of appservice listeners so it can do the regex checks on incoming invites to see if the state_key matches. It isn't enough to just rely on the room listeners and store.get_app_service_rooms as the room will initially not exist or won't be on the ASes radar due to having none of its users in the room. --- synapse/notifier.py | 30 +++++++++++++++++++++++++++++- 1 file changed, 29 insertions(+), 1 deletion(-) diff --git a/synapse/notifier.py b/synapse/notifier.py index 2475f3ffbe..09d23e79b8 100644 --- a/synapse/notifier.py +++ b/synapse/notifier.py @@ -36,8 +36,10 @@ class _NotificationListener(object): so that it can remove itself from the indexes in the Notifier class. """ - def __init__(self, user, rooms, from_token, limit, timeout, deferred): + def __init__(self, user, rooms, from_token, limit, timeout, deferred, + appservice=None): self.user = user + self.appservice = appservice self.from_token = from_token self.limit = limit self.timeout = timeout @@ -65,6 +67,10 @@ class _NotificationListener(object): lst.discard(self) notifier.user_to_listeners.get(self.user, set()).discard(self) + if self.appservice: + notifier.appservice_to_listeners.get( + self.appservice, set() + ).discard(self) class Notifier(object): @@ -79,6 +85,7 @@ class Notifier(object): self.rooms_to_listeners = {} self.user_to_listeners = {} + self.appservice_to_listeners = {} self.event_sources = hs.get_event_sources() @@ -114,6 +121,17 @@ class Notifier(object): for user in extra_users: listeners |= self.user_to_listeners.get(user, set()).copy() + for appservice in self.appservice_to_listeners: + # TODO (kegan): Redundant appservice listener checks? + # App services will already be in the rooms_to_listeners set, but + # that isn't enough. They need to be checked here in order to + # receive *invites* for users they are interested in. Does this + # make the rooms_to_listeners check somewhat obselete? + if appservice.is_interested(event): + listeners |= self.appservice_to_listeners.get( + appservice, set() + ).copy() + logger.debug("on_new_room_event listeners %s", listeners) # TODO (erikj): Can we make this more efficient by hitting the @@ -280,6 +298,10 @@ class Notifier(object): if not from_token: from_token = yield self.event_sources.get_current_token() + appservice = yield self.hs.get_datastore().get_app_service_by_user_id( + user.to_string() + ) + listener = _NotificationListener( user, rooms, @@ -287,6 +309,7 @@ class Notifier(object): limit, timeout, deferred, + appservice=appservice ) def _timeout_listener(): @@ -319,6 +342,11 @@ class Notifier(object): self.user_to_listeners.setdefault(listener.user, set()).add(listener) + if listener.appservice: + self.appservice_to_listeners.setdefault( + listener.appservice, set() + ).add(listener) + @defer.inlineCallbacks @log_function def _check_for_updates(self, listener): From 0ebd632d39e7c493d26e0dd24876232caf2660b4 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 09:46:38 +0000 Subject: [PATCH 20/45] Fix unit tests --- tests/rest/client/v1/test_presence.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tests/rest/client/v1/test_presence.py b/tests/rest/client/v1/test_presence.py index 7c5df5c116..5f2ef64efc 100644 --- a/tests/rest/client/v1/test_presence.py +++ b/tests/rest/client/v1/test_presence.py @@ -295,6 +295,9 @@ class PresenceEventStreamTestCase(unittest.TestCase): self.mock_datastore = hs.get_datastore() self.mock_datastore.get_app_service_by_token = Mock(return_value=None) + self.mock_datastore.get_app_service_by_user_id = Mock( + return_value=defer.succeed(None) + ) def get_profile_displayname(user_id): return defer.succeed("Frank") From 806a6c886aaa695a7dbfd35f71b9cc59941b8366 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 09:48:57 +0000 Subject: [PATCH 21/45] PEP8 --- synapse/storage/stream.py | 1 - 1 file changed, 1 deletion(-) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 5d01ecf200..09417bd147 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -169,7 +169,6 @@ class StreamStore(SQLBaseStore): "limit": limit } - def app_service_interested(row): if row["room_id"] in room_ids_for_as: return True From 16b90764adb8f2ab49b1853855d0fb739b79d245 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 10:44:32 +0000 Subject: [PATCH 22/45] Convert expected format for AS regex to include exclusivity. Previously you just specified the regex as a string, now it expects a JSON object with a 'regex' key and an 'exclusive' boolean, as per spec. --- synapse/appservice/__init__.py | 26 +++++++++++++------ synapse/rest/appservice/v1/register.py | 35 +++++--------------------- synapse/storage/appservice.py | 16 +++++++++--- 3 files changed, 38 insertions(+), 39 deletions(-) diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index 381b4cfc4a..b5e7ac16ba 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -46,19 +46,31 @@ class ApplicationService(object): def _check_namespaces(self, namespaces): # Sanity check that it is of the form: # { - # users: ["regex",...], - # aliases: ["regex",...], - # rooms: ["regex",...], + # users: [ {regex: "[A-z]+.*", exclusive: true}, ...], + # aliases: [ {regex: "[A-z]+.*", exclusive: true}, ...], + # rooms: [ {regex: "[A-z]+.*", exclusive: true}, ...], # } if not namespaces: return None for ns in ApplicationService.NS_LIST: + if ns not in namespaces: + namespaces[ns] = [] + continue + if type(namespaces[ns]) != list: - raise ValueError("Bad namespace value for '%s'", ns) - for regex in namespaces[ns]: - if not isinstance(regex, basestring): - raise ValueError("Expected string regex for ns '%s'", ns) + raise ValueError("Bad namespace value for '%s'" % ns) + for regex_obj in namespaces[ns]: + if not isinstance(regex_obj, dict): + raise ValueError("Expected dict regex for ns '%s'" % ns) + if not isinstance(regex_obj.get("exclusive"), bool): + raise ValueError( + "Expected bool for 'exclusive' in ns '%s'" % ns + ) + if not isinstance(regex_obj.get("regex"), basestring): + raise ValueError( + "Expected string for 'regex' in ns '%s'" % ns + ) return namespaces def _matches_regex(self, test_string, namespace_key): diff --git a/synapse/rest/appservice/v1/register.py b/synapse/rest/appservice/v1/register.py index 3bd0c1220c..a4f6159773 100644 --- a/synapse/rest/appservice/v1/register.py +++ b/synapse/rest/appservice/v1/register.py @@ -48,18 +48,12 @@ class RegisterRestServlet(AppServiceRestServlet): 400, "Missed required keys: as_token(str) / url(str)." ) - namespaces = { - "users": [], - "rooms": [], - "aliases": [] - } - - if "namespaces" in params: - self._parse_namespace(namespaces, params["namespaces"], "users") - self._parse_namespace(namespaces, params["namespaces"], "rooms") - self._parse_namespace(namespaces, params["namespaces"], "aliases") - - app_service = ApplicationService(as_token, as_url, namespaces) + try: + app_service = ApplicationService( + as_token, as_url, params["namespaces"] + ) + except ValueError as e: + raise SynapseError(400, e.message) app_service = yield self.handler.register(app_service) hs_token = app_service.hs_token @@ -68,23 +62,6 @@ class RegisterRestServlet(AppServiceRestServlet): "hs_token": hs_token })) - def _parse_namespace(self, target_ns, origin_ns, ns): - if ns not in target_ns or ns not in origin_ns: - return # nothing to parse / map through to. - - possible_regex_list = origin_ns[ns] - if not type(possible_regex_list) == list: - raise SynapseError(400, "Namespace %s isn't an array." % ns) - - for regex in possible_regex_list: - if not isinstance(regex, basestring): - raise SynapseError( - 400, "Regex '%s' isn't a string in namespace %s" % - (regex, ns) - ) - - target_ns[ns] = origin_ns[ns] - class UnregisterRestServlet(AppServiceRestServlet): """Handles AS registration with the home server. diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index dc3666efd4..a3aa41e5fc 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -13,6 +13,8 @@ # See the License for the specific language governing permissions and # limitations under the License. import logging +import simplejson +from simplejson import JSONDecodeError from twisted.internet import defer from synapse.api.errors import StoreError @@ -23,12 +25,18 @@ from ._base import SQLBaseStore logger = logging.getLogger(__name__) +def log_failure(failure): + logger.error("Failed to detect application services: %s", failure.value) + logger.error(failure.getTraceback()) + + class ApplicationServiceStore(SQLBaseStore): def __init__(self, hs): super(ApplicationServiceStore, self).__init__(hs) self.services_cache = [] self.cache_defer = self._populate_cache() + self.cache_defer.addErrback(log_failure) @defer.inlineCallbacks def unregister_app_service(self, token): @@ -128,11 +136,11 @@ class ApplicationServiceStore(SQLBaseStore): ) for (ns_int, ns_str) in enumerate(ApplicationService.NS_LIST): if ns_str in service.namespaces: - for regex in service.namespaces[ns_str]: + for regex_obj in service.namespaces[ns_str]: txn.execute( "INSERT INTO application_services_regex(" "as_id, namespace, regex) values(?,?,?)", - (as_id, ns_int, regex) + (as_id, ns_int, simplejson.dumps(regex_obj)) ) return True @@ -215,10 +223,12 @@ class ApplicationServiceStore(SQLBaseStore): try: services[as_token]["namespaces"][ ApplicationService.NS_LIST[ns_int]].append( - res["regex"] + simplejson.loads(res["regex"]) ) except IndexError: logger.error("Bad namespace enum '%s'. %s", ns_int, res) + except JSONDecodeError: + logger.error("Bad regex object '%s'", res["regex"]) # TODO get last successful txn id f.e. service for service in services.values(): From 40c9896705208b1455192b496e3e8e3bc9e9c0a9 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 11:03:56 +0000 Subject: [PATCH 23/45] Add functions to return whether an AS has exclusively claimed a matching namespace. --- synapse/appservice/__init__.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/synapse/appservice/__init__.py b/synapse/appservice/__init__.py index b5e7ac16ba..a268a6bcc4 100644 --- a/synapse/appservice/__init__.py +++ b/synapse/appservice/__init__.py @@ -73,7 +73,7 @@ class ApplicationService(object): ) return namespaces - def _matches_regex(self, test_string, namespace_key): + def _matches_regex(self, test_string, namespace_key, return_obj=False): if not isinstance(test_string, basestring): logger.error( "Expected a string to test regex against, but got %s", @@ -81,11 +81,19 @@ class ApplicationService(object): ) return False - for regex in self.namespaces[namespace_key]: - if re.match(regex, test_string): + for regex_obj in self.namespaces[namespace_key]: + if re.match(regex_obj["regex"], test_string): + if return_obj: + return regex_obj return True return False + def _is_exclusive(self, ns_key, test_string): + regex_obj = self._matches_regex(test_string, ns_key, return_obj=True) + if regex_obj: + return regex_obj["exclusive"] + return False + def _matches_user(self, event, member_list): if (hasattr(event, "sender") and self.is_interested_in_user(event.sender)): @@ -155,5 +163,14 @@ class ApplicationService(object): def is_interested_in_room(self, room_id): return self._matches_regex(room_id, ApplicationService.NS_ROOMS) + def is_exclusive_user(self, user_id): + return self._is_exclusive(ApplicationService.NS_USERS, user_id) + + def is_exclusive_alias(self, alias): + return self._is_exclusive(ApplicationService.NS_ALIASES, alias) + + def is_exclusive_room(self, room_id): + return self._is_exclusive(ApplicationService.NS_ROOMS, room_id) + def __str__(self): return "ApplicationService: %s" % (self.__dict__,) From 127efeeb68a4ee6fe6c3bfe417a980878f6e165d Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 11:10:48 +0000 Subject: [PATCH 24/45] Update unit tests to use new format. --- tests/appservice/test_appservice.py | 39 +++++++++++++++++------------ tests/storage/test_appservice.py | 12 ++++++--- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py index d12e4f2644..c1c2892eb1 100644 --- a/tests/appservice/test_appservice.py +++ b/tests/appservice/test_appservice.py @@ -18,6 +18,13 @@ from mock import Mock, PropertyMock from tests import unittest +def _regex(regex, exclusive=True): + return { + "regex": regex, + exclusive: exclusive + } + + class ApplicationServiceTestCase(unittest.TestCase): def setUp(self): @@ -36,21 +43,21 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_regex_user_id_prefix_match(self): self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) self.event.sender = "@irc_foobar:matrix.org" self.assertTrue(self.service.is_interested(self.event)) def test_regex_user_id_prefix_no_match(self): self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) self.event.sender = "@someone_else:matrix.org" self.assertFalse(self.service.is_interested(self.event)) def test_regex_room_member_is_checked(self): self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) self.event.sender = "@someone_else:matrix.org" self.event.type = "m.room.member" @@ -59,21 +66,21 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_regex_room_id_match(self): self.service.namespaces[ApplicationService.NS_ROOMS].append( - "!some_prefix.*some_suffix:matrix.org" + _regex("!some_prefix.*some_suffix:matrix.org") ) self.event.room_id = "!some_prefixs0m3th1nGsome_suffix:matrix.org" self.assertTrue(self.service.is_interested(self.event)) def test_regex_room_id_no_match(self): self.service.namespaces[ApplicationService.NS_ROOMS].append( - "!some_prefix.*some_suffix:matrix.org" + _regex("!some_prefix.*some_suffix:matrix.org") ) self.event.room_id = "!XqBunHwQIXUiqCaoxq:matrix.org" self.assertFalse(self.service.is_interested(self.event)) def test_regex_alias_match(self): self.service.namespaces[ApplicationService.NS_ALIASES].append( - "#irc_.*:matrix.org" + _regex("#irc_.*:matrix.org") ) self.assertTrue(self.service.is_interested( self.event, @@ -82,7 +89,7 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_regex_alias_no_match(self): self.service.namespaces[ApplicationService.NS_ALIASES].append( - "#irc_.*:matrix.org" + _regex("#irc_.*:matrix.org") ) self.assertFalse(self.service.is_interested( self.event, @@ -91,10 +98,10 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_regex_multiple_matches(self): self.service.namespaces[ApplicationService.NS_ALIASES].append( - "#irc_.*:matrix.org" + _regex("#irc_.*:matrix.org") ) self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) self.event.sender = "@irc_foobar:matrix.org" self.assertTrue(self.service.is_interested( @@ -104,10 +111,10 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_restrict_to_rooms(self): self.service.namespaces[ApplicationService.NS_ROOMS].append( - "!flibble_.*:matrix.org" + _regex("!flibble_.*:matrix.org") ) self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) self.event.sender = "@irc_foobar:matrix.org" self.event.room_id = "!wibblewoo:matrix.org" @@ -118,10 +125,10 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_restrict_to_aliases(self): self.service.namespaces[ApplicationService.NS_ALIASES].append( - "#xmpp_.*:matrix.org" + _regex("#xmpp_.*:matrix.org") ) self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) self.event.sender = "@irc_foobar:matrix.org" self.assertFalse(self.service.is_interested( @@ -132,10 +139,10 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_restrict_to_senders(self): self.service.namespaces[ApplicationService.NS_ALIASES].append( - "#xmpp_.*:matrix.org" + _regex("#xmpp_.*:matrix.org") ) self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) self.event.sender = "@xmpp_foobar:matrix.org" self.assertFalse(self.service.is_interested( @@ -146,7 +153,7 @@ class ApplicationServiceTestCase(unittest.TestCase): def test_member_list_match(self): self.service.namespaces[ApplicationService.NS_USERS].append( - "@irc_.*" + _regex("@irc_.*") ) join_list = [ Mock( diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index fc733d4c79..9a646fa6eb 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -50,9 +50,15 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): def test_update_and_retrieval_of_service(self): url = "https://matrix.org/appservices/foobar" hs_token = "hstok" - user_regex = ["@foobar_.*:matrix.org"] - alias_regex = ["#foobar_.*:matrix.org"] - room_regex = [] + user_regex = [ + {"regex": "@foobar_.*:matrix.org", "exclusive": True} + ] + alias_regex = [ + {"regex": "#foobar_.*:matrix.org", "exclusive": True} + ] + room_regex = [ + + ] service = ApplicationService( url=url, hs_token=hs_token, token=self.as_token, namespaces={ ApplicationService.NS_USERS: user_regex, From de190e49d5711edb5503e5574296a3c2a02ca06c Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 11:51:06 +0000 Subject: [PATCH 25/45] Add more unit tests for exclusive namespaces. --- tests/appservice/test_appservice.py | 50 ++++++++++++++++++++++++++++- tests/storage/test_appservice.py | 2 +- 2 files changed, 50 insertions(+), 2 deletions(-) diff --git a/tests/appservice/test_appservice.py b/tests/appservice/test_appservice.py index c1c2892eb1..eb7becf725 100644 --- a/tests/appservice/test_appservice.py +++ b/tests/appservice/test_appservice.py @@ -21,7 +21,7 @@ from tests import unittest def _regex(regex, exclusive=True): return { "regex": regex, - exclusive: exclusive + "exclusive": exclusive } @@ -87,6 +87,54 @@ class ApplicationServiceTestCase(unittest.TestCase): aliases_for_event=["#irc_foobar:matrix.org", "#athing:matrix.org"] )) + def test_non_exclusive_alias(self): + self.service.namespaces[ApplicationService.NS_ALIASES].append( + _regex("#irc_.*:matrix.org", exclusive=False) + ) + self.assertFalse(self.service.is_exclusive_alias( + "#irc_foobar:matrix.org" + )) + + def test_non_exclusive_room(self): + self.service.namespaces[ApplicationService.NS_ROOMS].append( + _regex("!irc_.*:matrix.org", exclusive=False) + ) + self.assertFalse(self.service.is_exclusive_room( + "!irc_foobar:matrix.org" + )) + + def test_non_exclusive_user(self): + self.service.namespaces[ApplicationService.NS_USERS].append( + _regex("@irc_.*:matrix.org", exclusive=False) + ) + self.assertFalse(self.service.is_exclusive_user( + "@irc_foobar:matrix.org" + )) + + def test_exclusive_alias(self): + self.service.namespaces[ApplicationService.NS_ALIASES].append( + _regex("#irc_.*:matrix.org", exclusive=True) + ) + self.assertTrue(self.service.is_exclusive_alias( + "#irc_foobar:matrix.org" + )) + + def test_exclusive_user(self): + self.service.namespaces[ApplicationService.NS_USERS].append( + _regex("@irc_.*:matrix.org", exclusive=True) + ) + self.assertTrue(self.service.is_exclusive_user( + "@irc_foobar:matrix.org" + )) + + def test_exclusive_room(self): + self.service.namespaces[ApplicationService.NS_ROOMS].append( + _regex("!irc_.*:matrix.org", exclusive=True) + ) + self.assertTrue(self.service.is_exclusive_room( + "!irc_foobar:matrix.org" + )) + def test_regex_alias_no_match(self): self.service.namespaces[ApplicationService.NS_ALIASES].append( _regex("#irc_.*:matrix.org") diff --git a/tests/storage/test_appservice.py b/tests/storage/test_appservice.py index 9a646fa6eb..ca5b92ec85 100644 --- a/tests/storage/test_appservice.py +++ b/tests/storage/test_appservice.py @@ -54,7 +54,7 @@ class ApplicationServiceStoreTestCase(unittest.TestCase): {"regex": "@foobar_.*:matrix.org", "exclusive": True} ] alias_regex = [ - {"regex": "#foobar_.*:matrix.org", "exclusive": True} + {"regex": "#foobar_.*:matrix.org", "exclusive": False} ] room_regex = [ From 58ff0660642e6ef8f9fe5672fb50e5d75a569479 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Fri, 27 Feb 2015 13:51:41 +0000 Subject: [PATCH 26/45] Implement exclusive namespace checks. --- synapse/handlers/directory.py | 14 ++++++++++++-- synapse/handlers/register.py | 11 ++++++----- 2 files changed, 18 insertions(+), 7 deletions(-) diff --git a/synapse/handlers/directory.py b/synapse/handlers/directory.py index 11d20a5d2d..f76febee8f 100644 --- a/synapse/handlers/directory.py +++ b/synapse/handlers/directory.py @@ -232,13 +232,23 @@ class DirectoryHandler(BaseHandler): @defer.inlineCallbacks def can_modify_alias(self, alias, user_id=None): + # Any application service "interested" in an alias they are regexing on + # can modify the alias. + # Users can only modify the alias if ALL the interested services have + # non-exclusive locks on the alias (or there are no interested services) services = yield self.store.get_app_services() interested_services = [ s for s in services if s.is_interested_in_alias(alias.to_string()) ] + for service in interested_services: if user_id == service.sender: - # this user IS the app service + # this user IS the app service so they can do whatever they like defer.returnValue(True) return - defer.returnValue(len(interested_services) == 0) + elif service.is_exclusive_alias(alias.to_string()): + # another service has an exclusive lock on this alias. + defer.returnValue(False) + return + # either no interested services, or no service with an exclusive lock + defer.returnValue(True) diff --git a/synapse/handlers/register.py b/synapse/handlers/register.py index 516a936cee..cda4a8502a 100644 --- a/synapse/handlers/register.py +++ b/synapse/handlers/register.py @@ -201,11 +201,12 @@ class RegistrationHandler(BaseHandler): interested_services = [ s for s in services if s.is_interested_in_user(user_id) ] - if len(interested_services) > 0: - raise SynapseError( - 400, "This user ID is reserved by an application service.", - errcode=Codes.EXCLUSIVE - ) + for service in interested_services: + if service.is_exclusive_user(user_id): + raise SynapseError( + 400, "This user ID is reserved by an application service.", + errcode=Codes.EXCLUSIVE + ) def _generate_token(self, user_id): # urlsafe variant uses _ and - so use . as the separator and replace From 9dc9118e552bfddaa7579b4ded8b1a0da7eff0e6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 27 Feb 2015 15:16:47 +0000 Subject: [PATCH 27/45] Document FederationRateLimiter --- synapse/federation/transport/server.py | 59 ++++++++++++++++++++++---- 1 file changed, 51 insertions(+), 8 deletions(-) diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index a9e625f127..390e54b9fb 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -32,6 +32,21 @@ logger = logging.getLogger(__name__) class FederationRateLimiter(object): def __init__(self, clock, window_size, sleep_limit, sleep_msec, reject_limit, concurrent_requests): + """ + Args: + clock (Clock) + window_size (int): The window size in milliseconds. + sleep_limit (int): The number of requests received in the last + `window_size` milliseconds before we artificially start + delaying processing of requests. + sleep_msec (int): The number of milliseconds to delay processing + of incoming requests by. + reject_limit (int): The maximum number of requests that are can be + queued for processing before we start rejecting requests with + a 429 Too Many Requests response. + concurrent_requests (int): The number of concurrent requests to + process. + """ self.clock = clock self.window_size = window_size @@ -43,9 +58,23 @@ class FederationRateLimiter(object): self.ratelimiters = {} def ratelimit(self, host): + """Used to ratelimit an incoming request from given host + + Example usage: + + with rate_limiter.ratelimit(origin) as wait_deferred: + yield wait_deferred + # Handle request ... + + Args: + host (str): Origin of incoming request. + + Returns: + _PerHostRatelimiter + """ return self.ratelimiters.setdefault( host, - PerHostRatelimiter( + _PerHostRatelimiter( clock=self.clock, window_size=self.window_size, sleep_limit=self.sleep_limit, @@ -56,7 +85,7 @@ class FederationRateLimiter(object): ).ratelimit() -class PerHostRatelimiter(object): +class _PerHostRatelimiter(object): def __init__(self, clock, window_size, sleep_limit, sleep_msec, reject_limit, concurrent_requests): self.clock = clock @@ -123,17 +152,25 @@ class PerHostRatelimiter(object): else: return defer.succeed(None) - logger.debug("Ratelimit [%s]: len(self.request_times)=%d", id(request_id), len(self.request_times)) - logger.debug("Ratelimit [%s]: len(self.request_times)=%d", id(request_id), len(self.request_times)) + logger.debug( + "Ratelimit [%s]: len(self.request_times)=%d", + id(request_id), len(self.request_times), + ) if len(self.request_times) > self.sleep_limit: - logger.debug("Ratelimit [%s]: sleeping req", id(request_id)) + logger.debug( + "Ratelimit [%s]: sleeping req", + id(request_id), + ) ret_defer = sleep(self.sleep_msec/1000.0) self.sleeping_requests.add(request_id) def on_wait_finished(_): - logger.debug("Ratelimit [%s]: Finished sleeping", id(request_id)) + logger.debug( + "Ratelimit [%s]: Finished sleeping", + id(request_id), + ) self.sleeping_requests.discard(request_id) queue_defer = queue_request() return queue_defer @@ -143,7 +180,10 @@ class PerHostRatelimiter(object): ret_defer = queue_request() def on_start(r): - logger.debug("Ratelimit [%s]: Processing req", id(request_id)) + logger.debug( + "Ratelimit [%s]: Processing req", + id(request_id), + ) self.current_processing.add(request_id) return r @@ -162,7 +202,10 @@ class PerHostRatelimiter(object): return ret_defer def _on_exit(self, request_id): - logger.debug("Ratelimit [%s]: Processed req", id(request_id)) + logger.debug( + "Ratelimit [%s]: Processed req", + id(request_id), + ) self.current_processing.discard(request_id) try: request_id, deferred = self.ready_request_queue.popitem() From 0554d0708225afe13d141bd00e3aaca2509f3f78 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Fri, 27 Feb 2015 15:41:52 +0000 Subject: [PATCH 28/45] Move federation rate limiting out of transport layer --- synapse/federation/transport/__init__.py | 4 +- synapse/federation/transport/server.py | 204 +------------------- synapse/util/ratelimitutils.py | 226 +++++++++++++++++++++++ 3 files changed, 230 insertions(+), 204 deletions(-) create mode 100644 synapse/util/ratelimitutils.py diff --git a/synapse/federation/transport/__init__.py b/synapse/federation/transport/__init__.py index 7028ca6947..f0283b5105 100644 --- a/synapse/federation/transport/__init__.py +++ b/synapse/federation/transport/__init__.py @@ -21,9 +21,11 @@ support HTTPS), however individual pairings of servers may decide to communicate over a different (albeit still reliable) protocol. """ -from .server import TransportLayerServer, FederationRateLimiter +from .server import TransportLayerServer from .client import TransportLayerClient +from synapse.util.ratelimitutils import FederationRateLimiter + class TransportLayer(TransportLayerServer, TransportLayerClient): """This is a basic implementation of the transport layer that translates diff --git a/synapse/federation/transport/server.py b/synapse/federation/transport/server.py index 390e54b9fb..fce9c0195e 100644 --- a/synapse/federation/transport/server.py +++ b/synapse/federation/transport/server.py @@ -16,11 +16,9 @@ from twisted.internet import defer from synapse.api.urls import FEDERATION_PREFIX as PREFIX -from synapse.api.errors import Codes, SynapseError, LimitExceededError -from synapse.util.async import sleep +from synapse.api.errors import Codes, SynapseError from synapse.util.logutils import log_function -import collections import logging import simplejson as json import re @@ -29,206 +27,6 @@ import re logger = logging.getLogger(__name__) -class FederationRateLimiter(object): - def __init__(self, clock, window_size, sleep_limit, sleep_msec, - reject_limit, concurrent_requests): - """ - Args: - clock (Clock) - window_size (int): The window size in milliseconds. - sleep_limit (int): The number of requests received in the last - `window_size` milliseconds before we artificially start - delaying processing of requests. - sleep_msec (int): The number of milliseconds to delay processing - of incoming requests by. - reject_limit (int): The maximum number of requests that are can be - queued for processing before we start rejecting requests with - a 429 Too Many Requests response. - concurrent_requests (int): The number of concurrent requests to - process. - """ - self.clock = clock - - self.window_size = window_size - self.sleep_limit = sleep_limit - self.sleep_msec = sleep_msec - self.reject_limit = reject_limit - self.concurrent_requests = concurrent_requests - - self.ratelimiters = {} - - def ratelimit(self, host): - """Used to ratelimit an incoming request from given host - - Example usage: - - with rate_limiter.ratelimit(origin) as wait_deferred: - yield wait_deferred - # Handle request ... - - Args: - host (str): Origin of incoming request. - - Returns: - _PerHostRatelimiter - """ - return self.ratelimiters.setdefault( - host, - _PerHostRatelimiter( - clock=self.clock, - window_size=self.window_size, - sleep_limit=self.sleep_limit, - sleep_msec=self.sleep_msec, - reject_limit=self.reject_limit, - concurrent_requests=self.concurrent_requests, - ) - ).ratelimit() - - -class _PerHostRatelimiter(object): - def __init__(self, clock, window_size, sleep_limit, sleep_msec, - reject_limit, concurrent_requests): - self.clock = clock - - self.window_size = window_size - self.sleep_limit = sleep_limit - self.sleep_msec = sleep_msec - self.reject_limit = reject_limit - self.concurrent_requests = concurrent_requests - - self.sleeping_requests = set() - self.ready_request_queue = collections.OrderedDict() - self.current_processing = set() - self.request_times = [] - - def is_empty(self): - time_now = self.clock.time_msec() - self.request_times[:] = [ - r for r in self.request_times - if time_now - r < self.window_size - ] - - return not ( - self.ready_request_queue - or self.sleeping_requests - or self.current_processing - or self.request_times - ) - - def ratelimit(self): - request_id = object() - - def on_enter(): - return self._on_enter(request_id) - - def on_exit(exc_type, exc_val, exc_tb): - return self._on_exit(request_id) - - return ContextManagerFunction(on_enter, on_exit) - - def _on_enter(self, request_id): - time_now = self.clock.time_msec() - self.request_times[:] = [ - r for r in self.request_times - if time_now - r < self.window_size - ] - - queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) - if queue_size > self.reject_limit: - raise LimitExceededError( - retry_after_ms=int( - self.window_size / self.sleep_limit - ), - ) - - self.request_times.append(time_now) - - def queue_request(): - if len(self.current_processing) > self.concurrent_requests: - logger.debug("Ratelimit [%s]: Queue req", id(request_id)) - queue_defer = defer.Deferred() - self.ready_request_queue[request_id] = queue_defer - return queue_defer - else: - return defer.succeed(None) - - logger.debug( - "Ratelimit [%s]: len(self.request_times)=%d", - id(request_id), len(self.request_times), - ) - - if len(self.request_times) > self.sleep_limit: - logger.debug( - "Ratelimit [%s]: sleeping req", - id(request_id), - ) - ret_defer = sleep(self.sleep_msec/1000.0) - - self.sleeping_requests.add(request_id) - - def on_wait_finished(_): - logger.debug( - "Ratelimit [%s]: Finished sleeping", - id(request_id), - ) - self.sleeping_requests.discard(request_id) - queue_defer = queue_request() - return queue_defer - - ret_defer.addBoth(on_wait_finished) - else: - ret_defer = queue_request() - - def on_start(r): - logger.debug( - "Ratelimit [%s]: Processing req", - id(request_id), - ) - self.current_processing.add(request_id) - return r - - def on_err(r): - self.current_processing.discard(request_id) - return r - - def on_both(r): - # Ensure that we've properly cleaned up. - self.sleeping_requests.discard(request_id) - self.ready_request_queue.pop(request_id, None) - return r - - ret_defer.addCallbacks(on_start, on_err) - ret_defer.addBoth(on_both) - return ret_defer - - def _on_exit(self, request_id): - logger.debug( - "Ratelimit [%s]: Processed req", - id(request_id), - ) - self.current_processing.discard(request_id) - try: - request_id, deferred = self.ready_request_queue.popitem() - self.current_processing.add(request_id) - deferred.callback(None) - except KeyError: - pass - - -class ContextManagerFunction(object): - def __init__(self, on_enter, on_exit): - self.on_enter = on_enter - self.on_exit = on_exit - - def __enter__(self): - if self.on_enter: - return self.on_enter() - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.on_exit: - return self.on_exit(exc_type, exc_val, exc_tb) - - class TransportLayerServer(object): """Handles incoming federation HTTP requests""" diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py new file mode 100644 index 0000000000..259d5f6f88 --- /dev/null +++ b/synapse/util/ratelimitutils.py @@ -0,0 +1,226 @@ +# -*- coding: utf-8 -*- +# Copyright 2015 OpenMarket Ltd +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from twisted.internet import defer + +from synapse.api.errors import LimitExceededError + +from synapse.util.async import sleep + +import collections +import logging + + +logger = logging.getLogger(__name__) + + +class FederationRateLimiter(object): + def __init__(self, clock, window_size, sleep_limit, sleep_msec, + reject_limit, concurrent_requests): + """ + Args: + clock (Clock) + window_size (int): The window size in milliseconds. + sleep_limit (int): The number of requests received in the last + `window_size` milliseconds before we artificially start + delaying processing of requests. + sleep_msec (int): The number of milliseconds to delay processing + of incoming requests by. + reject_limit (int): The maximum number of requests that are can be + queued for processing before we start rejecting requests with + a 429 Too Many Requests response. + concurrent_requests (int): The number of concurrent requests to + process. + """ + self.clock = clock + + self.window_size = window_size + self.sleep_limit = sleep_limit + self.sleep_msec = sleep_msec + self.reject_limit = reject_limit + self.concurrent_requests = concurrent_requests + + self.ratelimiters = {} + + def ratelimit(self, host): + """Used to ratelimit an incoming request from given host + + Example usage: + + with rate_limiter.ratelimit(origin) as wait_deferred: + yield wait_deferred + # Handle request ... + + Args: + host (str): Origin of incoming request. + + Returns: + _PerHostRatelimiter + """ + return self.ratelimiters.setdefault( + host, + _PerHostRatelimiter( + clock=self.clock, + window_size=self.window_size, + sleep_limit=self.sleep_limit, + sleep_msec=self.sleep_msec, + reject_limit=self.reject_limit, + concurrent_requests=self.concurrent_requests, + ) + ).ratelimit() + + +class _PerHostRatelimiter(object): + def __init__(self, clock, window_size, sleep_limit, sleep_msec, + reject_limit, concurrent_requests): + self.clock = clock + + self.window_size = window_size + self.sleep_limit = sleep_limit + self.sleep_msec = sleep_msec + self.reject_limit = reject_limit + self.concurrent_requests = concurrent_requests + + self.sleeping_requests = set() + self.ready_request_queue = collections.OrderedDict() + self.current_processing = set() + self.request_times = [] + + def is_empty(self): + time_now = self.clock.time_msec() + self.request_times[:] = [ + r for r in self.request_times + if time_now - r < self.window_size + ] + + return not ( + self.ready_request_queue + or self.sleeping_requests + or self.current_processing + or self.request_times + ) + + def ratelimit(self): + request_id = object() + + def on_enter(): + return self._on_enter(request_id) + + def on_exit(exc_type, exc_val, exc_tb): + return self._on_exit(request_id) + + return ContextManagerFunction(on_enter, on_exit) + + def _on_enter(self, request_id): + time_now = self.clock.time_msec() + self.request_times[:] = [ + r for r in self.request_times + if time_now - r < self.window_size + ] + + queue_size = len(self.ready_request_queue) + len(self.sleeping_requests) + if queue_size > self.reject_limit: + raise LimitExceededError( + retry_after_ms=int( + self.window_size / self.sleep_limit + ), + ) + + self.request_times.append(time_now) + + def queue_request(): + if len(self.current_processing) > self.concurrent_requests: + logger.debug("Ratelimit [%s]: Queue req", id(request_id)) + queue_defer = defer.Deferred() + self.ready_request_queue[request_id] = queue_defer + return queue_defer + else: + return defer.succeed(None) + + logger.debug( + "Ratelimit [%s]: len(self.request_times)=%d", + id(request_id), len(self.request_times), + ) + + if len(self.request_times) > self.sleep_limit: + logger.debug( + "Ratelimit [%s]: sleeping req", + id(request_id), + ) + ret_defer = sleep(self.sleep_msec/1000.0) + + self.sleeping_requests.add(request_id) + + def on_wait_finished(_): + logger.debug( + "Ratelimit [%s]: Finished sleeping", + id(request_id), + ) + self.sleeping_requests.discard(request_id) + queue_defer = queue_request() + return queue_defer + + ret_defer.addBoth(on_wait_finished) + else: + ret_defer = queue_request() + + def on_start(r): + logger.debug( + "Ratelimit [%s]: Processing req", + id(request_id), + ) + self.current_processing.add(request_id) + return r + + def on_err(r): + self.current_processing.discard(request_id) + return r + + def on_both(r): + # Ensure that we've properly cleaned up. + self.sleeping_requests.discard(request_id) + self.ready_request_queue.pop(request_id, None) + return r + + ret_defer.addCallbacks(on_start, on_err) + ret_defer.addBoth(on_both) + return ret_defer + + def _on_exit(self, request_id): + logger.debug( + "Ratelimit [%s]: Processed req", + id(request_id), + ) + self.current_processing.discard(request_id) + try: + request_id, deferred = self.ready_request_queue.popitem() + self.current_processing.add(request_id) + deferred.callback(None) + except KeyError: + pass + + +class ContextManagerFunction(object): + def __init__(self, on_enter, on_exit): + self.on_enter = on_enter + self.on_exit = on_exit + + def __enter__(self): + if self.on_enter: + return self.on_enter() + + def __exit__(self, exc_type, exc_val, exc_tb): + if self.on_exit: + return self.on_exit(exc_type, exc_val, exc_tb) From ebc48306662cf8719a0ea64e2955bf8d5e037a8e Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 09:53:00 +0000 Subject: [PATCH 29/45] PR tweaks: set earlier on and use 'as json' for compat --- synapse/storage/appservice.py | 18 +++++++----------- synapse/storage/registration.py | 2 +- synapse/storage/stream.py | 8 ++++---- 3 files changed, 12 insertions(+), 16 deletions(-) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index d0632d55d1..c6ca2ab04e 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -219,20 +219,17 @@ class ApplicationServiceStore(SQLBaseStore): # get all rooms matching the room ID regex. room_entries = yield self.get_all_rooms() # RoomEntry list - matching_room_id_list = [ + matching_room_list = set([ r.room_id for r in room_entries if service.is_interested_in_room(r.room_id) - ] + ]) # resolve room IDs for matching room alias regex. room_alias_mappings = yield self.get_all_associations() - matching_alias_list = [ + matching_room_list |= set([ r.room_id for r in room_alias_mappings if service.is_interested_in_alias(r.room_alias) - ] - room_ids_matching_alias_or_id = set( - matching_room_id_list + matching_alias_list - ) + ]) # get all rooms for every user for this AS. This is scoped to users on # this HS only. @@ -241,11 +238,10 @@ class ApplicationServiceStore(SQLBaseStore): u["name"] for u in user_list if service.is_interested_in_user(u["name"]) ] - rooms_for_user_matching_user_id = [] # RoomsForUser list + rooms_for_user_matching_user_id = set() # RoomsForUser list for user_id in user_list: rooms_for_user = yield self.get_rooms_for_user(user_id) - rooms_for_user_matching_user_id += rooms_for_user - rooms_for_user_matching_user_id = set(rooms_for_user_matching_user_id) + rooms_for_user_matching_user_id |= set(rooms_for_user) # make RoomsForUser tuples for room ids and aliases which are not in the # main rooms_for_user_list - e.g. they are rooms which do not have AS @@ -253,7 +249,7 @@ class ApplicationServiceStore(SQLBaseStore): known_room_ids = [r.room_id for r in rooms_for_user_matching_user_id] missing_rooms_for_user = [ RoomsForUser(r, service.sender, "join") for r in - room_ids_matching_alias_or_id if r not in known_room_ids + matching_room_list if r not in known_room_ids ] rooms_for_user_matching_user_id |= set(missing_rooms_for_user) diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 7aff3dbd33..9c92575c7f 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -93,7 +93,7 @@ class RegistrationStore(SQLBaseStore): ) def get_all_users(self): - query = ("SELECT users.name FROM users") + query = "SELECT users.name FROM users" return self._execute( self.cursor_to_dict, query diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 09417bd147..bad427288d 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -43,7 +43,7 @@ from synapse.util.logutils import log_function from collections import namedtuple import logging -import simplejson +import simplejson as json logger = logging.getLogger(__name__) @@ -178,7 +178,7 @@ class StreamStore(SQLBaseStore): # interested in was invited to a room. We'll be passing this # through _get_events_txn later, so ignore the fact that this # may be a redacted event. - event_content = simplejson.loads(row["unrecognized_keys"]) + event_content = json.loads(row["unrecognized_keys"]) if (service.is_interested_in_user( event_content.get("state_key"))): return True @@ -202,7 +202,7 @@ class StreamStore(SQLBaseStore): self._set_before_and_after(ret, rows) if rows: - key = "s%d" % max([r["stream_ordering"] for r in rows]) + key = "s%d" % max(r["stream_ordering"] for r in rows) else: # Assume we didn't get anything because there was nothing to @@ -271,7 +271,7 @@ class StreamStore(SQLBaseStore): self._set_before_and_after(ret, rows) if rows: - key = "s%d" % max([r["stream_ordering"] for r in rows]) + key = "s%d" % max(r["stream_ordering"] for r in rows) else: # Assume we didn't get anything because there was nothing to From 3d73383d185b41b9986366da8123255e3a8ce1e0 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 10:16:24 +0000 Subject: [PATCH 30/45] Modify _simple_select_list to allow an empty WHERE clause. Use it for get_all_rooms and get_all_users. --- synapse/storage/_base.py | 22 +++++++++++++++------- synapse/storage/appservice.py | 4 ++-- synapse/storage/registration.py | 7 ++----- synapse/storage/room.py | 5 ++--- 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/synapse/storage/_base.py b/synapse/storage/_base.py index c98dd36aed..3725c9795d 100644 --- a/synapse/storage/_base.py +++ b/synapse/storage/_base.py @@ -450,7 +450,8 @@ class SQLBaseStore(object): Args: table : string giving the table name - keyvalues : dict of column names and values to select the rows with + keyvalues : dict of column names and values to select the rows with, + or None to not apply a WHERE clause. retcols : list of strings giving the names of the columns to return """ return self.runInteraction( @@ -469,13 +470,20 @@ class SQLBaseStore(object): keyvalues : dict of column names and values to select the rows with retcols : list of strings giving the names of the columns to return """ - sql = "SELECT %s FROM %s WHERE %s ORDER BY rowid asc" % ( - ", ".join(retcols), - table, - " AND ".join("%s = ?" % (k, ) for k in keyvalues) - ) + if keyvalues: + sql = "SELECT %s FROM %s WHERE %s ORDER BY rowid asc" % ( + ", ".join(retcols), + table, + " AND ".join("%s = ?" % (k, ) for k in keyvalues) + ) + txn.execute(sql, keyvalues.values()) + else: + sql = "SELECT %s FROM %s ORDER BY rowid asc" % ( + ", ".join(retcols), + table + ) + txn.execute(sql) - txn.execute(sql, keyvalues.values()) return self.cursor_to_dict(txn) def _simple_update_one(self, table, keyvalues, updatevalues, diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index c6ca2ab04e..0e3eab9422 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -220,8 +220,8 @@ class ApplicationServiceStore(SQLBaseStore): # get all rooms matching the room ID regex. room_entries = yield self.get_all_rooms() # RoomEntry list matching_room_list = set([ - r.room_id for r in room_entries if - service.is_interested_in_room(r.room_id) + r["room_id"] for r in room_entries if + service.is_interested_in_room(r["room_id"]) ]) # resolve room IDs for matching room alias regex. diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 9c92575c7f..54cd15bc0e 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -93,11 +93,8 @@ class RegistrationStore(SQLBaseStore): ) def get_all_users(self): - query = "SELECT users.name FROM users" - return self._execute( - self.cursor_to_dict, - query - ) + return self._simple_select_list( + table="users", keyvalues=None, retcols=["name"]) def get_user_by_token(self, token): """Get a user from the given access token. diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 3a64693404..6bd0b22ae5 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -77,9 +77,8 @@ class RoomStore(SQLBaseStore): Returns: A list of namedtuples containing the room information. """ - query = RoomsTable.select_statement() - return self._execute( - RoomsTable.decode_results, query, + return self._simple_select_list( + table="rooms", keyvalues=None, retcols=["room_id"] ) @defer.inlineCallbacks From b216b3689248094989168c340b60f500c93772a7 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 10:41:35 +0000 Subject: [PATCH 31/45] JOIN state_events rather than parsing unrecognized_keys to pull out member state_keys --- synapse/storage/appservice.py | 2 +- synapse/storage/stream.py | 14 ++++---------- 2 files changed, 5 insertions(+), 11 deletions(-) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 0e3eab9422..3a267d0442 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -218,7 +218,7 @@ class ApplicationServiceStore(SQLBaseStore): # less obvious. # get all rooms matching the room ID regex. - room_entries = yield self.get_all_rooms() # RoomEntry list + room_entries = yield self.get_all_rooms() matching_room_list = set([ r["room_id"] for r in room_entries if service.is_interested_in_room(r["room_id"]) diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index bad427288d..865cb13e9e 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -43,7 +43,6 @@ from synapse.util.logutils import log_function from collections import namedtuple import logging -import simplejson as json logger = logging.getLogger(__name__) @@ -161,8 +160,9 @@ class StreamStore(SQLBaseStore): # select all the events between from/to with a sensible limit sql = ( - "SELECT e.event_id, e.room_id, e.type, e.unrecognized_keys, " - "e.stream_ordering FROM events AS e " + "SELECT e.event_id, e.room_id, e.type, s.state_key, " + "e.stream_ordering FROM events AS e LEFT JOIN state_events as s ON " + "e.event_id = s.event_id " "WHERE e.stream_ordering > ? AND e.stream_ordering <= ? " "ORDER BY stream_ordering ASC LIMIT %(limit)d " ) % { @@ -174,13 +174,7 @@ class StreamStore(SQLBaseStore): return True if row["type"] == EventTypes.Member: - # load up the content to inspect if some user the AS is - # interested in was invited to a room. We'll be passing this - # through _get_events_txn later, so ignore the fact that this - # may be a redacted event. - event_content = json.loads(row["unrecognized_keys"]) - if (service.is_interested_in_user( - event_content.get("state_key"))): + if service.is_interested_in_user(row.get("state_key")): return True return False From 377ae369c1275fabdac46fa00c0b2ba238467435 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 11:20:51 +0000 Subject: [PATCH 32/45] Wrap all of get_app_service_rooms in a txn. --- synapse/storage/appservice.py | 38 ++++++++++++++++++--------- synapse/storage/directory.py | 21 --------------- synapse/storage/registration.py | 4 --- synapse/storage/room.py | 10 ------- synapse/storage/roommember.py | 36 ++++++++++++++------------ synapse/storage/stream.py | 46 ++++++++++++++++----------------- 6 files changed, 67 insertions(+), 88 deletions(-) diff --git a/synapse/storage/appservice.py b/synapse/storage/appservice.py index 3a267d0442..97481d113b 100644 --- a/synapse/storage/appservice.py +++ b/synapse/storage/appservice.py @@ -15,6 +15,7 @@ import logging from twisted.internet import defer +from synapse.api.constants import Membership from synapse.api.errors import StoreError from synapse.appservice import ApplicationService from synapse.storage.roommember import RoomsForUser @@ -197,7 +198,6 @@ class ApplicationServiceStore(SQLBaseStore): # TODO: The from_cache=False impl # TODO: This should be JOINed with the application_services_regex table. - @defer.inlineCallbacks def get_app_service_rooms(self, service): """Get a list of RoomsForUser for this application service. @@ -212,35 +212,49 @@ class ApplicationServiceStore(SQLBaseStore): Returns: A list of RoomsForUser. """ - # FIXME: This is assuming that this store has methods from - # RoomStore, DirectoryStore, RegistrationStore, RoomMemberStore which is - # a bad assumption to make as it makes testing trickier and coupling - # less obvious. + return self.runInteraction( + "get_app_service_rooms", + self._get_app_service_rooms_txn, + service, + ) + def _get_app_service_rooms_txn(self, txn, service): # get all rooms matching the room ID regex. - room_entries = yield self.get_all_rooms() + room_entries = self._simple_select_list_txn( + txn=txn, table="rooms", keyvalues=None, retcols=["room_id"] + ) matching_room_list = set([ r["room_id"] for r in room_entries if service.is_interested_in_room(r["room_id"]) ]) # resolve room IDs for matching room alias regex. - room_alias_mappings = yield self.get_all_associations() + room_alias_mappings = self._simple_select_list_txn( + txn=txn, table="room_aliases", keyvalues=None, + retcols=["room_id", "room_alias"] + ) matching_room_list |= set([ - r.room_id for r in room_alias_mappings if - service.is_interested_in_alias(r.room_alias) + r["room_id"] for r in room_alias_mappings if + service.is_interested_in_alias(r["room_alias"]) ]) # get all rooms for every user for this AS. This is scoped to users on # this HS only. - user_list = yield self.get_all_users() + user_list = self._simple_select_list_txn( + txn=txn, table="users", keyvalues=None, retcols=["name"] + ) user_list = [ u["name"] for u in user_list if service.is_interested_in_user(u["name"]) ] rooms_for_user_matching_user_id = set() # RoomsForUser list for user_id in user_list: - rooms_for_user = yield self.get_rooms_for_user(user_id) + # FIXME: This assumes this store is linked with RoomMemberStore :( + rooms_for_user = self._get_rooms_for_user_where_membership_is_txn( + txn=txn, + user_id=user_id, + membership_list=[Membership.JOIN] + ) rooms_for_user_matching_user_id |= set(rooms_for_user) # make RoomsForUser tuples for room ids and aliases which are not in the @@ -253,7 +267,7 @@ class ApplicationServiceStore(SQLBaseStore): ] rooms_for_user_matching_user_id |= set(missing_rooms_for_user) - defer.returnValue(rooms_for_user_matching_user_id) + return rooms_for_user_matching_user_id @defer.inlineCallbacks def _populate_cache(self): diff --git a/synapse/storage/directory.py b/synapse/storage/directory.py index e391239a3c..68b7d59693 100644 --- a/synapse/storage/directory.py +++ b/synapse/storage/directory.py @@ -134,27 +134,6 @@ class DirectoryStore(SQLBaseStore): return room_id - @defer.inlineCallbacks - def get_all_associations(self): - """Retrieve the entire list of room alias -> room ID pairings. - - Returns: - A list of RoomAliasMappings. - """ - results = yield self._execute_and_decode( - "SELECT room_id, room_alias FROM room_aliases" - ) - - # TODO(kegan): It feels wrong to be specifying no servers here, but - # equally this function isn't required to obtain all servers so - # retrieving them "just for the sake of it" also seems wrong, but we - # want to conform to passing Objects around and not dicts.. - defer.returnValue([ - RoomAliasMapping( - room_id=r["room_id"], room_alias=r["room_alias"], servers="" - ) for r in results - ]) - def get_aliases_for_room(self, room_id): return self._simple_select_onecol( "room_aliases", diff --git a/synapse/storage/registration.py b/synapse/storage/registration.py index 54cd15bc0e..029b07cc66 100644 --- a/synapse/storage/registration.py +++ b/synapse/storage/registration.py @@ -92,10 +92,6 @@ class RegistrationStore(SQLBaseStore): query, user_id ) - def get_all_users(self): - return self._simple_select_list( - table="users", keyvalues=None, retcols=["name"]) - def get_user_by_token(self, token): """Get a user from the given access token. diff --git a/synapse/storage/room.py b/synapse/storage/room.py index 6bd0b22ae5..750b17a45f 100644 --- a/synapse/storage/room.py +++ b/synapse/storage/room.py @@ -71,16 +71,6 @@ class RoomStore(SQLBaseStore): RoomsTable.decode_single_result, query, room_id, ) - def get_all_rooms(self): - """Retrieve all the rooms. - - Returns: - A list of namedtuples containing the room information. - """ - return self._simple_select_list( - table="rooms", keyvalues=None, retcols=["room_id"] - ) - @defer.inlineCallbacks def get_rooms(self, is_public): """Retrieve a list of all public rooms. diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 58aa376c20..3d0172d09b 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -180,6 +180,14 @@ class RoomMemberStore(SQLBaseStore): if not membership_list: return defer.succeed(None) + return self.runInteraction( + "get_rooms_for_user_where_membership_is", + self._get_rooms_for_user_where_membership_is_txn, + user_id, membership_list + ) + + def _get_rooms_for_user_where_membership_is_txn(self, txn, user_id, + membership_list): where_clause = "user_id = ? AND (%s)" % ( " OR ".join(["membership = ?" for _ in membership_list]), ) @@ -187,24 +195,18 @@ class RoomMemberStore(SQLBaseStore): args = [user_id] args.extend(membership_list) - def f(txn): - sql = ( - "SELECT m.room_id, m.sender, m.membership" - " FROM room_memberships as m" - " INNER JOIN current_state_events as c" - " ON m.event_id = c.event_id" - " WHERE %s" - ) % (where_clause,) + sql = ( + "SELECT m.room_id, m.sender, m.membership" + " FROM room_memberships as m" + " INNER JOIN current_state_events as c" + " ON m.event_id = c.event_id" + " WHERE %s" + ) % (where_clause,) - txn.execute(sql, args) - return [ - RoomsForUser(**r) for r in self.cursor_to_dict(txn) - ] - - return self.runInteraction( - "get_rooms_for_user_where_membership_is", - f - ) + txn.execute(sql, args) + return [ + RoomsForUser(**r) for r in self.cursor_to_dict(txn) + ] def get_joined_hosts_for_room(self, room_id): return self._simple_select_onecol( diff --git a/synapse/storage/stream.py b/synapse/storage/stream.py index 865cb13e9e..09bc522210 100644 --- a/synapse/storage/stream.py +++ b/synapse/storage/stream.py @@ -146,18 +146,6 @@ class StreamStore(SQLBaseStore): defer.returnValue(([], to_key)) return - # Logic: - # - We want ALL events which match the AS room_id regex - # - We want ALL events which match the rooms represented by the AS - # room_alias regex - # - We want ALL events for rooms that AS users have joined. - # This is currently supported via get_app_service_rooms (which is used - # for the Notifier listener rooms). We can't reasonably make a SQL - # query for these room IDs, so we'll pull all the events between from/to - # and filter in python. - rooms_for_as = yield self.get_app_service_rooms(service) - room_ids_for_as = [r.room_id for r in rooms_for_as] - # select all the events between from/to with a sensible limit sql = ( "SELECT e.event_id, e.room_id, e.type, s.state_key, " @@ -169,20 +157,32 @@ class StreamStore(SQLBaseStore): "limit": limit } - def app_service_interested(row): - if row["room_id"] in room_ids_for_as: - return True - - if row["type"] == EventTypes.Member: - if service.is_interested_in_user(row.get("state_key")): - return True - return False - def f(txn): + # pull out all the events between the tokens txn.execute(sql, (from_id.stream, to_id.stream,)) - rows = self.cursor_to_dict(txn) + # Logic: + # - We want ALL events which match the AS room_id regex + # - We want ALL events which match the rooms represented by the AS + # room_alias regex + # - We want ALL events for rooms that AS users have joined. + # This is currently supported via get_app_service_rooms (which is + # used for the Notifier listener rooms). We can't reasonably make a + # SQL query for these room IDs, so we'll pull all the events between + # from/to and filter in python. + rooms_for_as = self._get_app_service_rooms_txn(txn, service) + room_ids_for_as = [r.room_id for r in rooms_for_as] + + def app_service_interested(row): + if row["room_id"] in room_ids_for_as: + return True + + if row["type"] == EventTypes.Member: + if service.is_interested_in_user(row.get("state_key")): + return True + return False + ret = self._get_events_txn( txn, # apply the filter on the room id list @@ -197,7 +197,6 @@ class StreamStore(SQLBaseStore): if rows: key = "s%d" % max(r["stream_ordering"] for r in rows) - else: # Assume we didn't get anything because there was nothing to # get. @@ -266,7 +265,6 @@ class StreamStore(SQLBaseStore): if rows: key = "s%d" % max(r["stream_ordering"] for r in rows) - else: # Assume we didn't get anything because there was nothing to # get. From cb97ea3ec236c23c745e59c3a857503dd8dc3410 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 11:23:46 +0000 Subject: [PATCH 33/45] PEP8 --- synapse/storage/roommember.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/roommember.py b/synapse/storage/roommember.py index 3d0172d09b..65ffb4627f 100644 --- a/synapse/storage/roommember.py +++ b/synapse/storage/roommember.py @@ -187,7 +187,7 @@ class RoomMemberStore(SQLBaseStore): ) def _get_rooms_for_user_where_membership_is_txn(self, txn, user_id, - membership_list): + membership_list): where_clause = "user_id = ? AND (%s)" % ( " OR ".join(["membership = ?" for _ in membership_list]), ) From 9d9b230501915c326136567349b0995623c48a21 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Mar 2015 11:33:45 +0000 Subject: [PATCH 34/45] Make the federation server ratelimiting configurable. --- synapse/config/ratelimiting.py | 36 ++++++++++++++++++++++++ synapse/federation/transport/__init__.py | 10 +++---- 2 files changed, 41 insertions(+), 5 deletions(-) diff --git a/synapse/config/ratelimiting.py b/synapse/config/ratelimiting.py index 17c7e64ce7..862c07ef8c 100644 --- a/synapse/config/ratelimiting.py +++ b/synapse/config/ratelimiting.py @@ -22,6 +22,12 @@ class RatelimitConfig(Config): self.rc_messages_per_second = args.rc_messages_per_second self.rc_message_burst_count = args.rc_message_burst_count + self.federation_rc_window_size = args.federation_rc_window_size + self.federation_rc_sleep_limit = args.federation_rc_sleep_limit + self.federation_rc_sleep_delay = args.federation_rc_sleep_delay + self.federation_rc_reject_limit = args.federation_rc_reject_limit + self.federation_rc_concurrent = args.federation_rc_concurrent + @classmethod def add_arguments(cls, parser): super(RatelimitConfig, cls).add_arguments(parser) @@ -34,3 +40,33 @@ class RatelimitConfig(Config): "--rc-message-burst-count", type=float, default=10, help="number of message a client can send before being throttled" ) + + rc_group.add_argument( + "--federation-rc-window-size", type=int, default=10000, + help="The federation window size in milliseconds", + ) + + rc_group.add_argument( + "--federation-rc-sleep-limit", type=int, default=10, + help="The number of federation requests from a single server" + " in a window before the server will delay processing the" + " request.", + ) + + rc_group.add_argument( + "--federation-rc-sleep-delay", type=int, default=500, + help="The duration in milliseconds to delay processing events from" + " remote servers by if they go over the sleep limit.", + ) + + rc_group.add_argument( + "--federation-rc-reject-limit", type=int, default=50, + help="The maximum number of concurrent federation requests allowed" + " from a single server", + ) + + rc_group.add_argument( + "--federation-rc-concurrent", type=int, default=3, + help="The number of federation requests to concurrently process" + " from a single server", + ) diff --git a/synapse/federation/transport/__init__.py b/synapse/federation/transport/__init__.py index f0283b5105..2a671b9aec 100644 --- a/synapse/federation/transport/__init__.py +++ b/synapse/federation/transport/__init__.py @@ -66,9 +66,9 @@ class TransportLayer(TransportLayerServer, TransportLayerClient): self.ratelimiter = FederationRateLimiter( self.clock, - window_size=10000, - sleep_limit=10, - sleep_msec=500, - reject_limit=50, - concurrent_requests=3, + window_size=homeserver.config.federation_rc_window_size, + sleep_limit=homeserver.config.federation_rc_sleep_limit, + sleep_msec=homeserver.config.federation_rc_sleep_delay, + reject_limit=homeserver.config.federation_rc_reject_limit, + concurrent_requests=homeserver.config.federation_rc_concurrent, ) From 23d9bd1d745a037202bb9a134cdb848eb65a01e9 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Mar 2015 11:39:40 +0000 Subject: [PATCH 35/45] Process transactions serially. Since the events received in a transaction are ordered, later events might depend on earlier events and so we shouldn't blindly process them in parellel. --- synapse/federation/federation_server.py | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 22b9663831..7ee37fb34d 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -112,17 +112,23 @@ class FederationServer(FederationBase): logger.debug("[%s] Transaction is new", transaction.transaction_id) with PreserveLoggingContext(): - dl = [] + results = [] + for pdu in pdu_list: d = self._handle_new_pdu(transaction.origin, pdu) def handle_failure(failure): failure.trap(FederationError) self.send_failure(failure.value, transaction.origin) + return failure d.addErrback(handle_failure) - dl.append(d) + try: + yield d + results.append({}) + except Exception as e: + results.append({"error": str(e)}) if hasattr(transaction, "edus"): for edu in [Edu(**x) for x in transaction.edus]: @@ -135,21 +141,11 @@ class FederationServer(FederationBase): for failure in getattr(transaction, "pdu_failures", []): logger.info("Got failure %r", failure) - results = yield defer.DeferredList(dl, consumeErrors=True) - - ret = [] - for r in results: - if r[0]: - ret.append({}) - else: - logger.exception(r[1]) - ret.append({"error": str(r[1].value)}) - - logger.debug("Returning: %s", str(ret)) + logger.debug("Returning: %s", str(results)) response = { "pdus": dict(zip( - (p.event_id for p in pdu_list), ret + (p.event_id for p in pdu_list), results )), } From 3f6b36d96e7e718ff7fb9d98a8211448f6b0d7e9 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 11:39:58 +0000 Subject: [PATCH 36/45] Add upgrade script --- scripts/upgrade_appservice_db.py | 36 ++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 scripts/upgrade_appservice_db.py diff --git a/scripts/upgrade_appservice_db.py b/scripts/upgrade_appservice_db.py new file mode 100644 index 0000000000..acdee56d9f --- /dev/null +++ b/scripts/upgrade_appservice_db.py @@ -0,0 +1,36 @@ +import argparse +import json +import sqlite3 + + +def main(dbname): + con = sqlite3.connect(dbname) + cur = con.cursor() + cur.execute("SELECT id, regex FROM application_services_regex") + for row in cur.fetchall(): + try: + print "checking %s..." % row[0] + json.loads(row[1]) + print "Already in new format" + except ValueError: + # row isn't in json, make it so. + string_regex = row[1] + new_regex = json.dumps({ + "regex": string_regex, + "exclusive": True + }) + cur.execute( + "UPDATE application_services_regex SET regex=? WHERE id=?", + (new_regex, row[0]) + ) + cur.close() + con.commit() + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + + parser.add_argument("database") + args = parser.parse_args() + + main(args.database) From 29481690c5b296a1c8aee3068d32ef083ef227f3 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Mar 2015 11:50:43 +0000 Subject: [PATCH 37/45] If we're yielding don't add errback --- synapse/federation/federation_server.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/synapse/federation/federation_server.py b/synapse/federation/federation_server.py index 7ee37fb34d..bc9bac809a 100644 --- a/synapse/federation/federation_server.py +++ b/synapse/federation/federation_server.py @@ -117,16 +117,12 @@ class FederationServer(FederationBase): for pdu in pdu_list: d = self._handle_new_pdu(transaction.origin, pdu) - def handle_failure(failure): - failure.trap(FederationError) - self.send_failure(failure.value, transaction.origin) - return failure - - d.addErrback(handle_failure) - try: yield d results.append({}) + except FederationError as e: + self.send_failure(e, transaction.origin) + results.append({"error": str(e)}) except Exception as e: results.append({"error": str(e)}) From 3077cb291590a7ba3b24a3d1a9985d65980924fb Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Mar 2015 13:32:44 +0000 Subject: [PATCH 38/45] Use contextlib.contextmanager instead of a custom class --- synapse/util/ratelimitutils.py | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/synapse/util/ratelimitutils.py b/synapse/util/ratelimitutils.py index 259d5f6f88..d4457af950 100644 --- a/synapse/util/ratelimitutils.py +++ b/synapse/util/ratelimitutils.py @@ -20,6 +20,7 @@ from synapse.api.errors import LimitExceededError from synapse.util.async import sleep import collections +import contextlib import logging @@ -112,16 +113,19 @@ class _PerHostRatelimiter(object): or self.request_times ) + @contextlib.contextmanager def ratelimit(self): + # `contextlib.contextmanager` takes a generator and turns it into a + # context manager. The generator should only yield once with a value + # to be returned by manager. + # Exceptions will be reraised at the yield. + request_id = object() - - def on_enter(): - return self._on_enter(request_id) - - def on_exit(exc_type, exc_val, exc_tb): - return self._on_exit(request_id) - - return ContextManagerFunction(on_enter, on_exit) + ret = self._on_enter(request_id) + try: + yield ret + finally: + self._on_exit(request_id) def _on_enter(self, request_id): time_now = self.clock.time_msec() @@ -210,17 +214,3 @@ class _PerHostRatelimiter(object): deferred.callback(None) except KeyError: pass - - -class ContextManagerFunction(object): - def __init__(self, on_enter, on_exit): - self.on_enter = on_enter - self.on_exit = on_exit - - def __enter__(self): - if self.on_enter: - return self.on_enter() - - def __exit__(self, exc_type, exc_val, exc_tb): - if self.on_exit: - return self.on_exit(exc_type, exc_val, exc_tb) From c3c01641d2d49988c59826912e4e48740ab4f32a Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 13:38:57 +0000 Subject: [PATCH 39/45] Run deltas and bump user_version in upgrade script --- UPGRADE.rst | 5 +++++ scripts/upgrade_appservice_db.py | 28 +++++++++++++++++++++++----- synapse/storage/__init__.py | 2 +- 3 files changed, 29 insertions(+), 6 deletions(-) diff --git a/UPGRADE.rst b/UPGRADE.rst index 4045baf4e0..8cda8d02a0 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -12,6 +12,11 @@ Servers which use captchas will need to add their public key to:: This is required in order to support registration fallback (typically used on mobile devices). +Servers which have registered application services need to upgrade their +database as the format of stored application services has changed in Synapse. +Run ``python upgrade_appservice_db.py `` to convert to the +new format. + Upgrading to v0.7.0 =================== diff --git a/scripts/upgrade_appservice_db.py b/scripts/upgrade_appservice_db.py index acdee56d9f..ae1b91c64f 100644 --- a/scripts/upgrade_appservice_db.py +++ b/scripts/upgrade_appservice_db.py @@ -1,17 +1,28 @@ +from synapse.storage import read_schema import argparse import json import sqlite3 -def main(dbname): - con = sqlite3.connect(dbname) - cur = con.cursor() +def do_other_deltas(cursor): + cursor.execute("PRAGMA user_version") + row = cursor.fetchone() + + if row and row[0]: + user_version = row[0] + # Run every version since after the current version. + for v in range(user_version + 1, 10): + print "Running delta: %d" % (v,) + sql_script = read_schema("delta/v%d" % (v,)) + cursor.executescript(sql_script) + + +def update_app_service_table(cur): cur.execute("SELECT id, regex FROM application_services_regex") for row in cur.fetchall(): try: print "checking %s..." % row[0] json.loads(row[1]) - print "Already in new format" except ValueError: # row isn't in json, make it so. string_regex = row[1] @@ -23,13 +34,20 @@ def main(dbname): "UPDATE application_services_regex SET regex=? WHERE id=?", (new_regex, row[0]) ) + + +def main(dbname): + con = sqlite3.connect(dbname) + cur = con.cursor() + do_other_deltas(cur) + update_app_service_table(cur) + cur.execute("PRAGMA user_version = 14") cur.close() con.commit() if __name__ == "__main__": parser = argparse.ArgumentParser() - parser.add_argument("database") args = parser.parse_args() diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index d16e7b8fac..3753cd28d0 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -74,7 +74,7 @@ SCHEMAS = [ # Remember to update this number every time an incompatible change is made to # database schema files, so the users will be informed on server restarts. -SCHEMA_VERSION = 13 +SCHEMA_VERSION = 14 dir_path = os.path.abspath(os.path.dirname(__file__)) From fb7b6c468128033fcaec3c3e4ca678e5cd3694a0 Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 14:52:31 +0000 Subject: [PATCH 40/45] Wording tweaks --- UPGRADE.rst | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/UPGRADE.rst b/UPGRADE.rst index 8cda8d02a0..f2a042f0e9 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -12,10 +12,9 @@ Servers which use captchas will need to add their public key to:: This is required in order to support registration fallback (typically used on mobile devices). -Servers which have registered application services need to upgrade their -database as the format of stored application services has changed in Synapse. -Run ``python upgrade_appservice_db.py `` to convert to the -new format. +The format of stored application services has changed in Synapse. You will need +to run ``python upgrade_appservice_db.py `` to convert to +the new format. Upgrading to v0.7.0 =================== From 8486910b64ba55292bfb7c619984eb73fd0a7e14 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Mar 2015 14:57:04 +0000 Subject: [PATCH 41/45] Bump webclient version --- synapse/python_dependencies.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/synapse/python_dependencies.py b/synapse/python_dependencies.py index c71df75404..5fe8a825e3 100644 --- a/synapse/python_dependencies.py +++ b/synapse/python_dependencies.py @@ -5,7 +5,7 @@ logger = logging.getLogger(__name__) REQUIREMENTS = { "syutil>=0.0.3": ["syutil"], - "matrix_angular_sdk>=0.6.3": ["syweb>=0.6.3"], + "matrix_angular_sdk>=0.6.4": ["syweb>=0.6.4"], "Twisted==14.0.2": ["twisted==14.0.2"], "service_identity>=1.0.0": ["service_identity>=1.0.0"], "pyopenssl>=0.14": ["OpenSSL>=0.14"], @@ -36,8 +36,8 @@ DEPENDENCY_LINKS = [ ), github_link( project="matrix-org/matrix-angular-sdk", - version="v0.6.3", - egg="matrix_angular_sdk-0.6.3", + version="v0.6.4", + egg="matrix_angular_sdk-0.6.4", ), ] From 2de5b14fe00d2337a3179aa19185e66350ba1a3e Mon Sep 17 00:00:00 2001 From: Kegan Dougal Date: Mon, 2 Mar 2015 15:36:37 +0000 Subject: [PATCH 42/45] Fix bug which prevented the HS pushing events to the AS due to FrozenEvents --- synapse/http/client.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/http/client.py b/synapse/http/client.py index 22b7145ac1..b53a07aa2d 100644 --- a/synapse/http/client.py +++ b/synapse/http/client.py @@ -143,7 +143,7 @@ class SimpleHttpClient(object): query_bytes = urllib.urlencode(args, True) uri = "%s?%s" % (uri, query_bytes) - json_str = json.dumps(json_body) + json_str = encode_canonical_json(json_body) response = yield self.agent.request( "PUT", From b41dc687739e92d5debcae5bb9feb21b97cbc178 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Mar 2015 16:36:19 +0000 Subject: [PATCH 43/45] We purposefully don't have a version 14 delta script. --- synapse/storage/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index 3753cd28d0..f0b7b8fef3 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -633,7 +633,7 @@ def prepare_database(db_conn): # Run every version since after the current version. for v in range(user_version + 1, SCHEMA_VERSION + 1): - if v == 10: + if v in (10, 14,): raise UpgradeDatabaseException( "No delta for version 10" ) From 9f03553f48d774ba88ca91a14050c94c4a1b77b6 Mon Sep 17 00:00:00 2001 From: Erik Johnston Date: Mon, 2 Mar 2015 16:38:40 +0000 Subject: [PATCH 44/45] Add missing comma --- synapse/storage/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/synapse/storage/__init__.py b/synapse/storage/__init__.py index f0b7b8fef3..d6ec446bd2 100644 --- a/synapse/storage/__init__.py +++ b/synapse/storage/__init__.py @@ -637,7 +637,7 @@ def prepare_database(db_conn): raise UpgradeDatabaseException( "No delta for version 10" ) - sql_script = read_schema("delta/v%d" % (v)) + sql_script = read_schema("delta/v%d" % (v,)) c.executescript(sql_script) db_conn.commit() From 3c8bd7809c1f8739c5b8676d8966fa19851a4e9a Mon Sep 17 00:00:00 2001 From: David Baker Date: Mon, 2 Mar 2015 16:40:17 +0000 Subject: [PATCH 45/45] Fix upgrade instructions --- UPGRADE.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/UPGRADE.rst b/UPGRADE.rst index f2a042f0e9..499b59aa0a 100644 --- a/UPGRADE.rst +++ b/UPGRADE.rst @@ -13,8 +13,8 @@ This is required in order to support registration fallback (typically used on mobile devices). The format of stored application services has changed in Synapse. You will need -to run ``python upgrade_appservice_db.py `` to convert to -the new format. +to run ``PYTHONPATH=. python scripts/upgrade_appservice_db.py `` +to convert to the new format. Upgrading to v0.7.0 ===================