From 14d8f342d5cae86d93d9ba2b411d486690ff54f5 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 14 Jan 2020 11:58:02 +0000 Subject: [PATCH 1/3] move batch_iter to a separate module --- synapse/storage/data_stores/main/cache.py | 2 +- synapse/storage/data_stores/main/devices.py | 2 +- synapse/storage/data_stores/main/events.py | 2 +- .../storage/data_stores/main/events_worker.py | 2 +- synapse/storage/data_stores/main/keys.py | 2 +- synapse/storage/data_stores/main/presence.py | 2 +- synapse/util/__init__.py | 17 --------- synapse/util/iterutils.py | 35 +++++++++++++++++++ 8 files changed, 41 insertions(+), 23 deletions(-) create mode 100644 synapse/util/iterutils.py diff --git a/synapse/storage/data_stores/main/cache.py b/synapse/storage/data_stores/main/cache.py index 54ed8574c4..bf91512daf 100644 --- a/synapse/storage/data_stores/main/cache.py +++ b/synapse/storage/data_stores/main/cache.py @@ -21,7 +21,7 @@ from twisted.internet import defer from synapse.storage._base import SQLBaseStore from synapse.storage.engines import PostgresEngine -from synapse.util import batch_iter +from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) diff --git a/synapse/storage/data_stores/main/devices.py b/synapse/storage/data_stores/main/devices.py index 9a828231c4..f0a7962dd0 100644 --- a/synapse/storage/data_stores/main/devices.py +++ b/synapse/storage/data_stores/main/devices.py @@ -33,13 +33,13 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import SQLBaseStore, db_to_json, make_in_list_sql_clause from synapse.storage.database import Database from synapse.types import get_verify_key_from_cross_signing_key -from synapse.util import batch_iter from synapse.util.caches.descriptors import ( Cache, cached, cachedInlineCallbacks, cachedList, ) +from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) diff --git a/synapse/storage/data_stores/main/events.py b/synapse/storage/data_stores/main/events.py index e9fe63037b..bb69c20448 100644 --- a/synapse/storage/data_stores/main/events.py +++ b/synapse/storage/data_stores/main/events.py @@ -43,9 +43,9 @@ from synapse.storage.data_stores.main.events_worker import EventsWorkerStore from synapse.storage.data_stores.main.state import StateGroupWorkerStore from synapse.storage.database import Database from synapse.types import RoomStreamToken, get_domain_from_id -from synapse.util import batch_iter from synapse.util.caches.descriptors import cached, cachedInlineCallbacks from synapse.util.frozenutils import frozendict_json_encoder +from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) diff --git a/synapse/storage/data_stores/main/events_worker.py b/synapse/storage/data_stores/main/events_worker.py index 0cce5232f5..3b93e0597a 100644 --- a/synapse/storage/data_stores/main/events_worker.py +++ b/synapse/storage/data_stores/main/events_worker.py @@ -37,8 +37,8 @@ from synapse.metrics.background_process_metrics import run_as_background_process from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage.database import Database from synapse.types import get_domain_from_id -from synapse.util import batch_iter from synapse.util.caches.descriptors import Cache +from synapse.util.iterutils import batch_iter from synapse.util.metrics import Measure logger = logging.getLogger(__name__) diff --git a/synapse/storage/data_stores/main/keys.py b/synapse/storage/data_stores/main/keys.py index 6b12f5a75f..ba89c68c9f 100644 --- a/synapse/storage/data_stores/main/keys.py +++ b/synapse/storage/data_stores/main/keys.py @@ -23,8 +23,8 @@ from signedjson.key import decode_verify_key_bytes from synapse.storage._base import SQLBaseStore from synapse.storage.keys import FetchKeyResult -from synapse.util import batch_iter from synapse.util.caches.descriptors import cached, cachedList +from synapse.util.iterutils import batch_iter logger = logging.getLogger(__name__) diff --git a/synapse/storage/data_stores/main/presence.py b/synapse/storage/data_stores/main/presence.py index a2c83e0867..604c8b7ddd 100644 --- a/synapse/storage/data_stores/main/presence.py +++ b/synapse/storage/data_stores/main/presence.py @@ -17,8 +17,8 @@ from twisted.internet import defer from synapse.storage._base import SQLBaseStore, make_in_list_sql_clause from synapse.storage.presence import UserPresenceState -from synapse.util import batch_iter from synapse.util.caches.descriptors import cached, cachedList +from synapse.util.iterutils import batch_iter class PresenceStore(SQLBaseStore): diff --git a/synapse/util/__init__.py b/synapse/util/__init__.py index 7856353002..60f0de70f7 100644 --- a/synapse/util/__init__.py +++ b/synapse/util/__init__.py @@ -15,7 +15,6 @@ import logging import re -from itertools import islice import attr @@ -107,22 +106,6 @@ class Clock(object): raise -def batch_iter(iterable, size): - """batch an iterable up into tuples with a maximum size - - Args: - iterable (iterable): the iterable to slice - size (int): the maximum batch size - - Returns: - an iterator over the chunks - """ - # make sure we can deal with iterables like lists too - sourceiter = iter(iterable) - # call islice until it returns an empty tuple - return iter(lambda: tuple(islice(sourceiter, size)), ()) - - def log_failure(failure, msg, consumeErrors=True): """Creates a function suitable for passing to `Deferred.addErrback` that logs any failures that occur. diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py new file mode 100644 index 0000000000..c10016fbc5 --- /dev/null +++ b/synapse/util/iterutils.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- +# Copyright 2014-2016 OpenMarket Ltd +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from itertools import islice +from typing import Iterable, Iterator, Sequence, Tuple, TypeVar + +T = TypeVar("T") + + +def batch_iter(iterable: Iterable[T], size: int) -> Iterator[Tuple[T]]: + """batch an iterable up into tuples with a maximum size + + Args: + iterable (iterable): the iterable to slice + size (int): the maximum batch size + + Returns: + an iterator over the chunks + """ + # make sure we can deal with iterables like lists too + sourceiter = iter(iterable) + # call islice until it returns an empty tuple + return iter(lambda: tuple(islice(sourceiter, size)), ()) From acc7820574426cf27673d941b1b0362272113351 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 16 Jan 2020 22:26:34 +0000 Subject: [PATCH 2/3] Log saml assertions rather than the whole response ... since the whole response is huge. We even need to break up the assertions, since kibana otherwise truncates them. --- synapse/handlers/saml_handler.py | 13 ++++++++- synapse/util/iterutils.py | 13 +++++++++ tests/util/test_itertools.py | 47 ++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 tests/util/test_itertools.py diff --git a/synapse/handlers/saml_handler.py b/synapse/handlers/saml_handler.py index 107f97032b..32638671c9 100644 --- a/synapse/handlers/saml_handler.py +++ b/synapse/handlers/saml_handler.py @@ -32,6 +32,7 @@ from synapse.types import ( mxid_localpart_allowed_characters, ) from synapse.util.async_helpers import Linearizer +from synapse.util.iterutils import chunk_seq logger = logging.getLogger(__name__) @@ -132,7 +133,17 @@ class SamlHandler: logger.warning("SAML2 response was not signed") raise SynapseError(400, "SAML2 response was not signed") - logger.info("SAML2 response: %s", saml2_auth.origxml) + logger.debug("SAML2 response: %s", saml2_auth.origxml) + for assertion in saml2_auth.assertions: + # kibana limits the length of a log field, whereas this is all rather + # useful, so split it up. + count = 0 + for part in chunk_seq(str(assertion), 10000): + logger.info( + "SAML2 assertion: %s%s", "(%i)..." % (count,) if count else "", part + ) + count += 1 + logger.info("SAML2 mapped attributes: %s", saml2_auth.ava) try: diff --git a/synapse/util/iterutils.py b/synapse/util/iterutils.py index c10016fbc5..06faeebe7f 100644 --- a/synapse/util/iterutils.py +++ b/synapse/util/iterutils.py @@ -33,3 +33,16 @@ def batch_iter(iterable: Iterable[T], size: int) -> Iterator[Tuple[T]]: sourceiter = iter(iterable) # call islice until it returns an empty tuple return iter(lambda: tuple(islice(sourceiter, size)), ()) + + +ISeq = TypeVar("ISeq", bound=Sequence, covariant=True) + + +def chunk_seq(iseq: ISeq, maxlen: int) -> Iterable[ISeq]: + """Split the given sequence into chunks of the given size + + The last chunk may be shorter than the given size. + + If the input is empty, no chunks are returned. + """ + return (iseq[i : i + maxlen] for i in range(0, len(iseq), maxlen)) diff --git a/tests/util/test_itertools.py b/tests/util/test_itertools.py new file mode 100644 index 0000000000..0ab0a91483 --- /dev/null +++ b/tests/util/test_itertools.py @@ -0,0 +1,47 @@ +# -*- coding: utf-8 -*- +# Copyright 2020 The Matrix.org Foundation C.I.C. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +from synapse.util.iterutils import chunk_seq + +from tests.unittest import TestCase + + +class ChunkSeqTests(TestCase): + def test_short_seq(self): + parts = chunk_seq("123", 8) + + self.assertEqual( + list(parts), ["123"], + ) + + def test_long_seq(self): + parts = chunk_seq("abcdefghijklmnop", 8) + + self.assertEqual( + list(parts), ["abcdefgh", "ijklmnop"], + ) + + def test_uneven_parts(self): + parts = chunk_seq("abcdefghijklmnop", 5) + + self.assertEqual( + list(parts), ["abcde", "fghij", "klmno", "p"], + ) + + def test_empty_input(self): + parts = chunk_seq([], 5) + + self.assertEqual( + list(parts), [], + ) From 95c5b9bfb3506d06e6b0a7d42adfb1f76f2cb7ca Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Thu, 16 Jan 2020 22:29:06 +0000 Subject: [PATCH 3/3] changelog --- changelog.d/6724.misc | 1 + 1 file changed, 1 insertion(+) create mode 100644 changelog.d/6724.misc diff --git a/changelog.d/6724.misc b/changelog.d/6724.misc new file mode 100644 index 0000000000..5256be75fa --- /dev/null +++ b/changelog.d/6724.misc @@ -0,0 +1 @@ +When processing a SAML response, log the assertions for easier configuration.