From e9bfe719ba1928dc191cea93120c5c8a89584434 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 5 Nov 2019 15:45:17 +0000 Subject: [PATCH 1/5] Strip overlong OpenGraph data from url preview ... to stop people causing DoSes with malicious web pages --- changelog.d/6331.feature | 1 + synapse/rest/media/v1/preview_url_resource.py | 20 ++++++++++- tests/rest/media/v1/test_url_preview.py | 34 +++++++++++++++++++ 3 files changed, 54 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6331.feature diff --git a/changelog.d/6331.feature b/changelog.d/6331.feature new file mode 100644 index 0000000000..eaf69ef3f6 --- /dev/null +++ b/changelog.d/6331.feature @@ -0,0 +1 @@ +Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 0c68c3aad5..6d8c39a410 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -56,6 +56,9 @@ logger = logging.getLogger(__name__) _charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I) _content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) +OG_TAG_NAME_MAXLEN = 50 +OG_TAG_VALUE_MAXLEN = 1000 + class PreviewUrlResource(DirectServeResource): isLeaf = True @@ -167,7 +170,7 @@ class PreviewUrlResource(DirectServeResource): ts (int): Returns: - Deferred[str]: json-encoded og data + Deferred[bytes]: json-encoded og data """ # check the URL cache in the DB (which will also provide us with # historical previews, if we have any) @@ -268,6 +271,17 @@ class PreviewUrlResource(DirectServeResource): logger.warn("Failed to find any OG data in %s", url) og = {} + # filter out any stupidly long values + keys_to_remove = [] + for k, v in og.items(): + if len(k) > OG_TAG_NAME_MAXLEN or len(v) > OG_TAG_VALUE_MAXLEN: + logger.warning( + "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN] + ) + keys_to_remove.append(k) + for k in keys_to_remove: + del og[k] + logger.debug("Calculated OG for %s as %s" % (url, og)) jsonog = json.dumps(og) @@ -502,6 +516,10 @@ def _calc_og(tree, media_uri): og = {} for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"): if "content" in tag.attrib: + # if we've got more than 50 tags, someone is taking the piss + if len(og) >= 50: + logger.warning("skipping OG for page with too many og: tags") + return {} og[tag.attrib["property"]] = tag.attrib["content"] # TODO: grab article: meta tags too, e.g.: diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index 976652aee8..da19a8e86f 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -247,6 +247,40 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.assertEqual(channel.code, 200) self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430") + def test_overlong_title(self): + self.lookups["matrix.org"] = [(IPv4Address, "8.8.8.8")] + + end_content = ( + b"" + b"" + b"x" * 2000 + b"" + b'' + b"" + ) + + request, channel = self.make_request( + "GET", "url_preview?url=http://matrix.org", shorthand=False + ) + request.render(self.preview_url) + self.pump() + + client = self.reactor.tcpClients[0][2].buildProtocol(None) + server = AccumulatingProtocol() + server.makeConnection(FakeTransport(client, self.reactor)) + client.makeConnection(FakeTransport(server, self.reactor)) + client.dataReceived( + ( + b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n" + b'Content-Type: text/html; charset="windows-1251"\r\n\r\n' + ) + % (len(end_content),) + + end_content + ) + + self.pump() + self.assertEqual(channel.code, 200) + res = channel.json_body + self.assertCountEqual(["og:description"], res.keys()) + def test_ipaddr(self): """ IP addresses can be previewed directly. From e78167c94b3f63136f7d0e4f32a05ad1befdc0ec Mon Sep 17 00:00:00 2001 From: Richard van der Hoff <1389908+richvdh@users.noreply.github.com> Date: Tue, 5 Nov 2019 16:46:39 +0000 Subject: [PATCH 2/5] Apply suggestions from code review Co-Authored-By: Brendan Abolivier Co-Authored-By: Erik Johnston --- synapse/rest/media/v1/preview_url_resource.py | 2 +- tests/rest/media/v1/test_url_preview.py | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 6d8c39a410..4d4b3c1462 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -518,7 +518,7 @@ def _calc_og(tree, media_uri): if "content" in tag.attrib: # if we've got more than 50 tags, someone is taking the piss if len(og) >= 50: - logger.warning("skipping OG for page with too many og: tags") + logger.warning("Skipping OG for page with too many 'og:' tags") return {} og[tag.attrib["property"]] = tag.attrib["content"] diff --git a/tests/rest/media/v1/test_url_preview.py b/tests/rest/media/v1/test_url_preview.py index da19a8e86f..852b8ab11c 100644 --- a/tests/rest/media/v1/test_url_preview.py +++ b/tests/rest/media/v1/test_url_preview.py @@ -279,6 +279,7 @@ class URLPreviewTests(unittest.HomeserverTestCase): self.pump() self.assertEqual(channel.code, 200) res = channel.json_body + # We should only see the `og:description` field, as `title` is too long and should be stripped out self.assertCountEqual(["og:description"], res.keys()) def test_ipaddr(self): From 81d49cbb07a4dc5a673e31a8a626af6e8a18f801 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Tue, 5 Nov 2019 17:22:58 +0000 Subject: [PATCH 3/5] Fix exception when OpenGraph tag values are ints --- changelog.d/6334.feature | 1 + synapse/rest/media/v1/preview_url_resource.py | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) create mode 100644 changelog.d/6334.feature diff --git a/changelog.d/6334.feature b/changelog.d/6334.feature new file mode 100644 index 0000000000..eaf69ef3f6 --- /dev/null +++ b/changelog.d/6334.feature @@ -0,0 +1 @@ +Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index 4d4b3c1462..ec9c4619c9 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -274,7 +274,8 @@ class PreviewUrlResource(DirectServeResource): # filter out any stupidly long values keys_to_remove = [] for k, v in og.items(): - if len(k) > OG_TAG_NAME_MAXLEN or len(v) > OG_TAG_VALUE_MAXLEN: + # values can be numeric as well as strings, hence the cast to str + if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN: logger.warning( "Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN] ) From feafd98aca3e72d27516c79f986a28ea39886ebc Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 6 Nov 2019 10:02:23 +0000 Subject: [PATCH 4/5] 1.5.1 --- CHANGES.md | 9 +++++++++ changelog.d/6331.feature | 1 - changelog.d/6334.feature | 1 - debian/changelog | 6 ++++++ synapse/__init__.py | 2 +- 5 files changed, 16 insertions(+), 3 deletions(-) delete mode 100644 changelog.d/6331.feature delete mode 100644 changelog.d/6334.feature diff --git a/CHANGES.md b/CHANGES.md index 6faa4b8dce..9312dc2941 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,3 +1,12 @@ +Synapse 1.5.1 (2019-11-06) +========================== + +Features +-------- + +- Limit the length of data returned by url previews, to prevent DoS attacks. ([\#6331](https://github.com/matrix-org/synapse/issues/6331), [\#6334](https://github.com/matrix-org/synapse/issues/6334)) + + Synapse 1.5.0 (2019-10-29) ========================== diff --git a/changelog.d/6331.feature b/changelog.d/6331.feature deleted file mode 100644 index eaf69ef3f6..0000000000 --- a/changelog.d/6331.feature +++ /dev/null @@ -1 +0,0 @@ -Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/changelog.d/6334.feature b/changelog.d/6334.feature deleted file mode 100644 index eaf69ef3f6..0000000000 --- a/changelog.d/6334.feature +++ /dev/null @@ -1 +0,0 @@ -Limit the length of data returned by url previews, to prevent DoS attacks. diff --git a/debian/changelog b/debian/changelog index acda7e5c63..c4415f460a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,3 +1,9 @@ +matrix-synapse-py3 (1.5.1) stable; urgency=medium + + * New synapse release 1.5.1. + + -- Synapse Packaging team Wed, 06 Nov 2019 10:02:14 +0000 + matrix-synapse-py3 (1.5.0) stable; urgency=medium * New synapse release 1.5.0. diff --git a/synapse/__init__.py b/synapse/__init__.py index 8587ffa76f..ec16f54a49 100644 --- a/synapse/__init__.py +++ b/synapse/__init__.py @@ -36,7 +36,7 @@ try: except ImportError: pass -__version__ = "1.5.0" +__version__ = "1.5.1" if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): # We import here so that we don't have to install a bunch of deps when From 4257feb20f328c83ac7cb27113f779e844623e30 Mon Sep 17 00:00:00 2001 From: Richard van der Hoff Date: Wed, 6 Nov 2019 13:35:56 +0000 Subject: [PATCH 5/5] build debs for eoan and bullseye --- scripts-dev/build_debian_packages | 2 ++ 1 file changed, 2 insertions(+) diff --git a/scripts-dev/build_debian_packages b/scripts-dev/build_debian_packages index 93305ee9b1..84eaec6a95 100755 --- a/scripts-dev/build_debian_packages +++ b/scripts-dev/build_debian_packages @@ -20,11 +20,13 @@ from concurrent.futures import ThreadPoolExecutor DISTS = ( "debian:stretch", "debian:buster", + "debian:bullseye", "debian:sid", "ubuntu:xenial", "ubuntu:bionic", "ubuntu:cosmic", "ubuntu:disco", + "ubuntu:eoan", ) DESC = '''\