Merge branch 'release-v1.5.1'

pull/6371/head
Richard van der Hoff 2019-11-06 13:50:55 +00:00
commit 08b2868ffe
6 changed files with 73 additions and 2 deletions

View File

@ -1,3 +1,12 @@
Synapse 1.5.1 (2019-11-06)
==========================
Features
--------
- Limit the length of data returned by url previews, to prevent DoS attacks. ([\#6331](https://github.com/matrix-org/synapse/issues/6331), [\#6334](https://github.com/matrix-org/synapse/issues/6334))
Synapse 1.5.0 (2019-10-29) Synapse 1.5.0 (2019-10-29)
========================== ==========================

6
debian/changelog vendored
View File

@ -1,3 +1,9 @@
matrix-synapse-py3 (1.5.1) stable; urgency=medium
* New synapse release 1.5.1.
-- Synapse Packaging team <packages@matrix.org> Wed, 06 Nov 2019 10:02:14 +0000
matrix-synapse-py3 (1.5.0) stable; urgency=medium matrix-synapse-py3 (1.5.0) stable; urgency=medium
* New synapse release 1.5.0. * New synapse release 1.5.0.

View File

@ -20,11 +20,13 @@ from concurrent.futures import ThreadPoolExecutor
DISTS = ( DISTS = (
"debian:stretch", "debian:stretch",
"debian:buster", "debian:buster",
"debian:bullseye",
"debian:sid", "debian:sid",
"ubuntu:xenial", "ubuntu:xenial",
"ubuntu:bionic", "ubuntu:bionic",
"ubuntu:cosmic", "ubuntu:cosmic",
"ubuntu:disco", "ubuntu:disco",
"ubuntu:eoan",
) )
DESC = '''\ DESC = '''\

View File

@ -36,7 +36,7 @@ try:
except ImportError: except ImportError:
pass pass
__version__ = "1.5.0" __version__ = "1.5.1"
if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)): if bool(os.environ.get("SYNAPSE_TEST_PATCH_LOG_CONTEXTS", False)):
# We import here so that we don't have to install a bunch of deps when # We import here so that we don't have to install a bunch of deps when

View File

@ -56,6 +56,9 @@ logger = logging.getLogger(__name__)
_charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I) _charset_match = re.compile(br"<\s*meta[^>]*charset\s*=\s*([a-z0-9-]+)", flags=re.I)
_content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I) _content_type_match = re.compile(r'.*; *charset="?(.*?)"?(;|$)', flags=re.I)
OG_TAG_NAME_MAXLEN = 50
OG_TAG_VALUE_MAXLEN = 1000
class PreviewUrlResource(DirectServeResource): class PreviewUrlResource(DirectServeResource):
isLeaf = True isLeaf = True
@ -167,7 +170,7 @@ class PreviewUrlResource(DirectServeResource):
ts (int): ts (int):
Returns: Returns:
Deferred[str]: json-encoded og data Deferred[bytes]: json-encoded og data
""" """
# check the URL cache in the DB (which will also provide us with # check the URL cache in the DB (which will also provide us with
# historical previews, if we have any) # historical previews, if we have any)
@ -268,6 +271,18 @@ class PreviewUrlResource(DirectServeResource):
logger.warn("Failed to find any OG data in %s", url) logger.warn("Failed to find any OG data in %s", url)
og = {} og = {}
# filter out any stupidly long values
keys_to_remove = []
for k, v in og.items():
# values can be numeric as well as strings, hence the cast to str
if len(k) > OG_TAG_NAME_MAXLEN or len(str(v)) > OG_TAG_VALUE_MAXLEN:
logger.warning(
"Pruning overlong tag %s from OG data", k[:OG_TAG_NAME_MAXLEN]
)
keys_to_remove.append(k)
for k in keys_to_remove:
del og[k]
logger.debug("Calculated OG for %s as %s" % (url, og)) logger.debug("Calculated OG for %s as %s" % (url, og))
jsonog = json.dumps(og) jsonog = json.dumps(og)
@ -502,6 +517,10 @@ def _calc_og(tree, media_uri):
og = {} og = {}
for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"): for tag in tree.xpath("//*/meta[starts-with(@property, 'og:')]"):
if "content" in tag.attrib: if "content" in tag.attrib:
# if we've got more than 50 tags, someone is taking the piss
if len(og) >= 50:
logger.warning("Skipping OG for page with too many 'og:' tags")
return {}
og[tag.attrib["property"]] = tag.attrib["content"] og[tag.attrib["property"]] = tag.attrib["content"]
# TODO: grab article: meta tags too, e.g.: # TODO: grab article: meta tags too, e.g.:

View File

@ -247,6 +247,41 @@ class URLPreviewTests(unittest.HomeserverTestCase):
self.assertEqual(channel.code, 200) self.assertEqual(channel.code, 200)
self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430") self.assertEqual(channel.json_body["og:title"], "\u0434\u043a\u0430")
def test_overlong_title(self):
self.lookups["matrix.org"] = [(IPv4Address, "8.8.8.8")]
end_content = (
b"<html><head>"
b"<title>" + b"x" * 2000 + b"</title>"
b'<meta property="og:description" content="hi" />'
b"</head></html>"
)
request, channel = self.make_request(
"GET", "url_preview?url=http://matrix.org", shorthand=False
)
request.render(self.preview_url)
self.pump()
client = self.reactor.tcpClients[0][2].buildProtocol(None)
server = AccumulatingProtocol()
server.makeConnection(FakeTransport(client, self.reactor))
client.makeConnection(FakeTransport(server, self.reactor))
client.dataReceived(
(
b"HTTP/1.0 200 OK\r\nContent-Length: %d\r\n"
b'Content-Type: text/html; charset="windows-1251"\r\n\r\n'
)
% (len(end_content),)
+ end_content
)
self.pump()
self.assertEqual(channel.code, 200)
res = channel.json_body
# We should only see the `og:description` field, as `title` is too long and should be stripped out
self.assertCountEqual(["og:description"], res.keys())
def test_ipaddr(self): def test_ipaddr(self):
""" """
IP addresses can be previewed directly. IP addresses can be previewed directly.