Fix Content-Disposition in media repository (#4176)
parent
835779f7fb
commit
8b1affe7d5
|
@ -0,0 +1 @@
|
|||
The media repository now no longer fails to decode UTF-8 filenames when downloading remote media.
|
|
@ -16,6 +16,7 @@
|
|||
import logging
|
||||
import os
|
||||
|
||||
from six import PY3
|
||||
from six.moves import urllib
|
||||
|
||||
from twisted.internet import defer
|
||||
|
@ -48,26 +49,21 @@ def parse_media_id(request):
|
|||
return server_name, media_id, file_name
|
||||
except Exception:
|
||||
raise SynapseError(
|
||||
404,
|
||||
"Invalid media id token %r" % (request.postpath,),
|
||||
Codes.UNKNOWN,
|
||||
404, "Invalid media id token %r" % (request.postpath,), Codes.UNKNOWN
|
||||
)
|
||||
|
||||
|
||||
def respond_404(request):
|
||||
respond_with_json(
|
||||
request, 404,
|
||||
cs_error(
|
||||
"Not found %r" % (request.postpath,),
|
||||
code=Codes.NOT_FOUND,
|
||||
),
|
||||
send_cors=True
|
||||
request,
|
||||
404,
|
||||
cs_error("Not found %r" % (request.postpath,), code=Codes.NOT_FOUND),
|
||||
send_cors=True,
|
||||
)
|
||||
|
||||
|
||||
@defer.inlineCallbacks
|
||||
def respond_with_file(request, media_type, file_path,
|
||||
file_size=None, upload_name=None):
|
||||
def respond_with_file(request, media_type, file_path, file_size=None, upload_name=None):
|
||||
logger.debug("Responding with %r", file_path)
|
||||
|
||||
if os.path.isfile(file_path):
|
||||
|
@ -97,31 +93,26 @@ def add_file_headers(request, media_type, file_size, upload_name):
|
|||
file_size (int): Size in bytes of the media, if known.
|
||||
upload_name (str): The name of the requested file, if any.
|
||||
"""
|
||||
|
||||
def _quote(x):
|
||||
return urllib.parse.quote(x.encode("utf-8"))
|
||||
|
||||
request.setHeader(b"Content-Type", media_type.encode("UTF-8"))
|
||||
if upload_name:
|
||||
if is_ascii(upload_name):
|
||||
disposition = ("inline; filename=%s" % (_quote(upload_name),)).encode("ascii")
|
||||
disposition = "inline; filename=%s" % (_quote(upload_name),)
|
||||
else:
|
||||
disposition = (
|
||||
"inline; filename*=utf-8''%s" % (_quote(upload_name),)).encode("ascii")
|
||||
disposition = "inline; filename*=utf-8''%s" % (_quote(upload_name),)
|
||||
|
||||
request.setHeader(b"Content-Disposition", disposition)
|
||||
request.setHeader(b"Content-Disposition", disposition.encode('ascii'))
|
||||
|
||||
# cache for at least a day.
|
||||
# XXX: we might want to turn this off for data we don't want to
|
||||
# recommend caching as it's sensitive or private - or at least
|
||||
# select private. don't bother setting Expires as all our
|
||||
# clients are smart enough to be happy with Cache-Control
|
||||
request.setHeader(
|
||||
b"Cache-Control", b"public,max-age=86400,s-maxage=86400"
|
||||
)
|
||||
|
||||
request.setHeader(
|
||||
b"Content-Length", b"%d" % (file_size,)
|
||||
)
|
||||
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
|
||||
request.setHeader(b"Content-Length", b"%d" % (file_size,))
|
||||
|
||||
|
||||
@defer.inlineCallbacks
|
||||
|
@ -153,6 +144,7 @@ class Responder(object):
|
|||
Responder is a context manager which *must* be used, so that any resources
|
||||
held can be cleaned up.
|
||||
"""
|
||||
|
||||
def write_to_consumer(self, consumer):
|
||||
"""Stream response into consumer
|
||||
|
||||
|
@ -186,9 +178,18 @@ class FileInfo(object):
|
|||
thumbnail_method (str)
|
||||
thumbnail_type (str): Content type of thumbnail, e.g. image/png
|
||||
"""
|
||||
def __init__(self, server_name, file_id, url_cache=False,
|
||||
thumbnail=False, thumbnail_width=None, thumbnail_height=None,
|
||||
thumbnail_method=None, thumbnail_type=None):
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
server_name,
|
||||
file_id,
|
||||
url_cache=False,
|
||||
thumbnail=False,
|
||||
thumbnail_width=None,
|
||||
thumbnail_height=None,
|
||||
thumbnail_method=None,
|
||||
thumbnail_type=None,
|
||||
):
|
||||
self.server_name = server_name
|
||||
self.file_id = file_id
|
||||
self.url_cache = url_cache
|
||||
|
@ -197,3 +198,74 @@ class FileInfo(object):
|
|||
self.thumbnail_height = thumbnail_height
|
||||
self.thumbnail_method = thumbnail_method
|
||||
self.thumbnail_type = thumbnail_type
|
||||
|
||||
|
||||
def get_filename_from_headers(headers):
|
||||
"""
|
||||
Get the filename of the downloaded file by inspecting the
|
||||
Content-Disposition HTTP header.
|
||||
|
||||
Args:
|
||||
headers (twisted.web.http_headers.Headers): The HTTP
|
||||
request headers.
|
||||
|
||||
Returns:
|
||||
A Unicode string of the filename, or None.
|
||||
"""
|
||||
content_disposition = headers.get(b"Content-Disposition", [b''])
|
||||
|
||||
# No header, bail out.
|
||||
if not content_disposition[0]:
|
||||
return
|
||||
|
||||
# dict of unicode: bytes, corresponding to the key value sections of the
|
||||
# Content-Disposition header.
|
||||
params = {}
|
||||
parts = content_disposition[0].split(b";")
|
||||
for i in parts:
|
||||
# Split into key-value pairs, if able
|
||||
# We don't care about things like `inline`, so throw it out
|
||||
if b"=" not in i:
|
||||
continue
|
||||
|
||||
key, value = i.strip().split(b"=")
|
||||
params[key.decode('ascii')] = value
|
||||
|
||||
upload_name = None
|
||||
|
||||
# First check if there is a valid UTF-8 filename
|
||||
upload_name_utf8 = params.get("filename*", None)
|
||||
if upload_name_utf8:
|
||||
if upload_name_utf8.lower().startswith(b"utf-8''"):
|
||||
upload_name_utf8 = upload_name_utf8[7:]
|
||||
# We have a filename*= section. This MUST be ASCII, and any UTF-8
|
||||
# bytes are %-quoted.
|
||||
if PY3:
|
||||
try:
|
||||
# Once it is decoded, we can then unquote the %-encoded
|
||||
# parts strictly into a unicode string.
|
||||
upload_name = urllib.parse.unquote(
|
||||
upload_name_utf8.decode('ascii'), errors="strict"
|
||||
)
|
||||
except UnicodeDecodeError:
|
||||
# Incorrect UTF-8.
|
||||
pass
|
||||
else:
|
||||
# On Python 2, we first unquote the %-encoded parts and then
|
||||
# decode it strictly using UTF-8.
|
||||
try:
|
||||
upload_name = urllib.parse.unquote(upload_name_utf8).decode('utf8')
|
||||
except UnicodeDecodeError:
|
||||
pass
|
||||
|
||||
# If there isn't check for an ascii name.
|
||||
if not upload_name:
|
||||
upload_name_ascii = params.get("filename", None)
|
||||
if upload_name_ascii and is_ascii(upload_name_ascii):
|
||||
# Make sure there's no %-quoted bytes. If there is, reject it as
|
||||
# non-valid ASCII.
|
||||
if b"%" not in upload_name_ascii:
|
||||
upload_name = upload_name_ascii.decode('ascii')
|
||||
|
||||
# This may be None here, indicating we did not find a matching name.
|
||||
return upload_name
|
||||
|
|
|
@ -14,14 +14,12 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import cgi
|
||||
import errno
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
|
||||
from six import PY3, iteritems
|
||||
from six.moves.urllib import parse as urlparse
|
||||
from six import iteritems
|
||||
|
||||
import twisted.internet.error
|
||||
import twisted.web.http
|
||||
|
@ -34,14 +32,18 @@ from synapse.api.errors import (
|
|||
NotFoundError,
|
||||
SynapseError,
|
||||
)
|
||||
from synapse.http.matrixfederationclient import MatrixFederationHttpClient
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
from synapse.util import logcontext
|
||||
from synapse.util.async_helpers import Linearizer
|
||||
from synapse.util.retryutils import NotRetryingDestination
|
||||
from synapse.util.stringutils import is_ascii, random_string
|
||||
from synapse.util.stringutils import random_string
|
||||
|
||||
from ._base import FileInfo, respond_404, respond_with_responder
|
||||
from ._base import (
|
||||
FileInfo,
|
||||
get_filename_from_headers,
|
||||
respond_404,
|
||||
respond_with_responder,
|
||||
)
|
||||
from .config_resource import MediaConfigResource
|
||||
from .download_resource import DownloadResource
|
||||
from .filepath import MediaFilePaths
|
||||
|
@ -62,7 +64,7 @@ class MediaRepository(object):
|
|||
def __init__(self, hs):
|
||||
self.hs = hs
|
||||
self.auth = hs.get_auth()
|
||||
self.client = MatrixFederationHttpClient(hs)
|
||||
self.client = hs.get_http_client()
|
||||
self.clock = hs.get_clock()
|
||||
self.server_name = hs.hostname
|
||||
self.store = hs.get_datastore()
|
||||
|
@ -397,39 +399,9 @@ class MediaRepository(object):
|
|||
yield finish()
|
||||
|
||||
media_type = headers[b"Content-Type"][0].decode('ascii')
|
||||
|
||||
upload_name = get_filename_from_headers(headers)
|
||||
time_now_ms = self.clock.time_msec()
|
||||
|
||||
content_disposition = headers.get(b"Content-Disposition", None)
|
||||
if content_disposition:
|
||||
_, params = cgi.parse_header(content_disposition[0].decode('ascii'),)
|
||||
upload_name = None
|
||||
|
||||
# First check if there is a valid UTF-8 filename
|
||||
upload_name_utf8 = params.get("filename*", None)
|
||||
if upload_name_utf8:
|
||||
if upload_name_utf8.lower().startswith("utf-8''"):
|
||||
upload_name = upload_name_utf8[7:]
|
||||
|
||||
# If there isn't check for an ascii name.
|
||||
if not upload_name:
|
||||
upload_name_ascii = params.get("filename", None)
|
||||
if upload_name_ascii and is_ascii(upload_name_ascii):
|
||||
upload_name = upload_name_ascii
|
||||
|
||||
if upload_name:
|
||||
if PY3:
|
||||
upload_name = urlparse.unquote(upload_name)
|
||||
else:
|
||||
upload_name = urlparse.unquote(upload_name.encode('ascii'))
|
||||
try:
|
||||
if isinstance(upload_name, bytes):
|
||||
upload_name = upload_name.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
upload_name = None
|
||||
else:
|
||||
upload_name = None
|
||||
|
||||
logger.info("Stored remote media in file %r", fname)
|
||||
|
||||
yield self.store.store_cached_remote_media(
|
||||
|
|
|
@ -13,7 +13,6 @@
|
|||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
|
||||
import cgi
|
||||
import datetime
|
||||
import errno
|
||||
import fnmatch
|
||||
|
@ -44,10 +43,11 @@ from synapse.http.server import (
|
|||
)
|
||||
from synapse.http.servlet import parse_integer, parse_string
|
||||
from synapse.metrics.background_process_metrics import run_as_background_process
|
||||
from synapse.rest.media.v1._base import get_filename_from_headers
|
||||
from synapse.util.async_helpers import ObservableDeferred
|
||||
from synapse.util.caches.expiringcache import ExpiringCache
|
||||
from synapse.util.logcontext import make_deferred_yieldable, run_in_background
|
||||
from synapse.util.stringutils import is_ascii, random_string
|
||||
from synapse.util.stringutils import random_string
|
||||
|
||||
from ._base import FileInfo
|
||||
|
||||
|
@ -336,31 +336,7 @@ class PreviewUrlResource(Resource):
|
|||
media_type = "application/octet-stream"
|
||||
time_now_ms = self.clock.time_msec()
|
||||
|
||||
content_disposition = headers.get(b"Content-Disposition", None)
|
||||
if content_disposition:
|
||||
_, params = cgi.parse_header(content_disposition[0],)
|
||||
download_name = None
|
||||
|
||||
# First check if there is a valid UTF-8 filename
|
||||
download_name_utf8 = params.get("filename*", None)
|
||||
if download_name_utf8:
|
||||
if download_name_utf8.lower().startswith("utf-8''"):
|
||||
download_name = download_name_utf8[7:]
|
||||
|
||||
# If there isn't check for an ascii name.
|
||||
if not download_name:
|
||||
download_name_ascii = params.get("filename", None)
|
||||
if download_name_ascii and is_ascii(download_name_ascii):
|
||||
download_name = download_name_ascii
|
||||
|
||||
if download_name:
|
||||
download_name = urlparse.unquote(download_name)
|
||||
try:
|
||||
download_name = download_name.decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
download_name = None
|
||||
else:
|
||||
download_name = None
|
||||
download_name = get_filename_from_headers(headers)
|
||||
|
||||
yield self.store.store_local_media(
|
||||
media_id=file_id,
|
||||
|
|
|
@ -17,15 +17,20 @@
|
|||
import os
|
||||
import shutil
|
||||
import tempfile
|
||||
from binascii import unhexlify
|
||||
|
||||
from mock import Mock
|
||||
from six.moves.urllib import parse
|
||||
|
||||
from twisted.internet import defer, reactor
|
||||
from twisted.internet.defer import Deferred
|
||||
|
||||
from synapse.config.repository import MediaStorageProviderConfig
|
||||
from synapse.rest.media.v1._base import FileInfo
|
||||
from synapse.rest.media.v1.filepath import MediaFilePaths
|
||||
from synapse.rest.media.v1.media_storage import MediaStorage
|
||||
from synapse.rest.media.v1.storage_provider import FileStorageProviderBackend
|
||||
from synapse.util.module_loader import load_module
|
||||
|
||||
from tests import unittest
|
||||
|
||||
|
@ -83,3 +88,143 @@ class MediaStorageTests(unittest.TestCase):
|
|||
body = f.read()
|
||||
|
||||
self.assertEqual(test_body, body)
|
||||
|
||||
|
||||
class MediaRepoTests(unittest.HomeserverTestCase):
|
||||
|
||||
hijack_auth = True
|
||||
user_id = "@test:user"
|
||||
|
||||
def make_homeserver(self, reactor, clock):
|
||||
|
||||
self.fetches = []
|
||||
|
||||
def get_file(destination, path, output_stream, args=None, max_size=None):
|
||||
"""
|
||||
Returns tuple[int,dict,str,int] of file length, response headers,
|
||||
absolute URI, and response code.
|
||||
"""
|
||||
|
||||
def write_to(r):
|
||||
data, response = r
|
||||
output_stream.write(data)
|
||||
return response
|
||||
|
||||
d = Deferred()
|
||||
d.addCallback(write_to)
|
||||
self.fetches.append((d, destination, path, args))
|
||||
return d
|
||||
|
||||
client = Mock()
|
||||
client.get_file = get_file
|
||||
|
||||
self.storage_path = self.mktemp()
|
||||
os.mkdir(self.storage_path)
|
||||
|
||||
config = self.default_config()
|
||||
config.media_store_path = self.storage_path
|
||||
config.thumbnail_requirements = {}
|
||||
config.max_image_pixels = 2000000
|
||||
|
||||
provider_config = {
|
||||
"module": "synapse.rest.media.v1.storage_provider.FileStorageProviderBackend",
|
||||
"store_local": True,
|
||||
"store_synchronous": False,
|
||||
"store_remote": True,
|
||||
"config": {"directory": self.storage_path},
|
||||
}
|
||||
|
||||
loaded = list(load_module(provider_config)) + [
|
||||
MediaStorageProviderConfig(False, False, False)
|
||||
]
|
||||
|
||||
config.media_storage_providers = [loaded]
|
||||
|
||||
hs = self.setup_test_homeserver(config=config, http_client=client)
|
||||
|
||||
return hs
|
||||
|
||||
def prepare(self, reactor, clock, hs):
|
||||
|
||||
self.media_repo = hs.get_media_repository_resource()
|
||||
self.download_resource = self.media_repo.children[b'download']
|
||||
|
||||
# smol png
|
||||
self.end_content = unhexlify(
|
||||
b"89504e470d0a1a0a0000000d4948445200000001000000010806"
|
||||
b"0000001f15c4890000000a49444154789c63000100000500010d"
|
||||
b"0a2db40000000049454e44ae426082"
|
||||
)
|
||||
|
||||
def _req(self, content_disposition):
|
||||
|
||||
request, channel = self.make_request(
|
||||
"GET", "example.com/12345", shorthand=False
|
||||
)
|
||||
request.render(self.download_resource)
|
||||
self.pump()
|
||||
|
||||
# We've made one fetch, to example.com, using the media URL, and asking
|
||||
# the other server not to do a remote fetch
|
||||
self.assertEqual(len(self.fetches), 1)
|
||||
self.assertEqual(self.fetches[0][1], "example.com")
|
||||
self.assertEqual(
|
||||
self.fetches[0][2], "/_matrix/media/v1/download/example.com/12345"
|
||||
)
|
||||
self.assertEqual(self.fetches[0][3], {"allow_remote": "false"})
|
||||
|
||||
headers = {
|
||||
b"Content-Length": [b"%d" % (len(self.end_content))],
|
||||
b"Content-Type": [b'image/png'],
|
||||
}
|
||||
if content_disposition:
|
||||
headers[b"Content-Disposition"] = [content_disposition]
|
||||
|
||||
self.fetches[0][0].callback(
|
||||
(self.end_content, (len(self.end_content), headers))
|
||||
)
|
||||
|
||||
self.pump()
|
||||
self.assertEqual(channel.code, 200)
|
||||
|
||||
return channel
|
||||
|
||||
def test_disposition_filename_ascii(self):
|
||||
"""
|
||||
If the filename is filename=<ascii> then Synapse will decode it as an
|
||||
ASCII string, and use filename= in the response.
|
||||
"""
|
||||
channel = self._req(b"inline; filename=out.png")
|
||||
|
||||
headers = channel.headers
|
||||
self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"])
|
||||
self.assertEqual(
|
||||
headers.getRawHeaders(b"Content-Disposition"), [b"inline; filename=out.png"]
|
||||
)
|
||||
|
||||
def test_disposition_filenamestar_utf8escaped(self):
|
||||
"""
|
||||
If the filename is filename=*utf8''<utf8 escaped> then Synapse will
|
||||
correctly decode it as the UTF-8 string, and use filename* in the
|
||||
response.
|
||||
"""
|
||||
filename = parse.quote(u"\u2603".encode('utf8')).encode('ascii')
|
||||
channel = self._req(b"inline; filename*=utf-8''" + filename + b".png")
|
||||
|
||||
headers = channel.headers
|
||||
self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"])
|
||||
self.assertEqual(
|
||||
headers.getRawHeaders(b"Content-Disposition"),
|
||||
[b"inline; filename*=utf-8''" + filename + b".png"],
|
||||
)
|
||||
|
||||
def test_disposition_none(self):
|
||||
"""
|
||||
If there is no filename, one isn't passed on in the Content-Disposition
|
||||
of the request.
|
||||
"""
|
||||
channel = self._req(None)
|
||||
|
||||
headers = channel.headers
|
||||
self.assertEqual(headers.getRawHeaders(b"Content-Type"), [b"image/png"])
|
||||
self.assertEqual(headers.getRawHeaders(b"Content-Disposition"), None)
|
||||
|
|
|
@ -14,6 +14,8 @@ from twisted.internet.error import DNSLookupError
|
|||
from twisted.internet.interfaces import IReactorPluggableNameResolver
|
||||
from twisted.python.failure import Failure
|
||||
from twisted.test.proto_helpers import MemoryReactorClock
|
||||
from twisted.web.http import unquote
|
||||
from twisted.web.http_headers import Headers
|
||||
|
||||
from synapse.http.site import SynapseRequest
|
||||
from synapse.util import Clock
|
||||
|
@ -50,6 +52,15 @@ class FakeChannel(object):
|
|||
raise Exception("No result yet.")
|
||||
return int(self.result["code"])
|
||||
|
||||
@property
|
||||
def headers(self):
|
||||
if not self.result:
|
||||
raise Exception("No result yet.")
|
||||
h = Headers()
|
||||
for i in self.result["headers"]:
|
||||
h.addRawHeader(*i)
|
||||
return h
|
||||
|
||||
def writeHeaders(self, version, code, reason, headers):
|
||||
self.result["version"] = version
|
||||
self.result["code"] = code
|
||||
|
@ -152,6 +163,9 @@ def make_request(
|
|||
path = b"/_matrix/client/r0/" + path
|
||||
path = path.replace(b"//", b"/")
|
||||
|
||||
if not path.startswith(b"/"):
|
||||
path = b"/" + path
|
||||
|
||||
if isinstance(content, text_type):
|
||||
content = content.encode('utf8')
|
||||
|
||||
|
@ -161,6 +175,7 @@ def make_request(
|
|||
req = request(site, channel)
|
||||
req.process = lambda: b""
|
||||
req.content = BytesIO(content)
|
||||
req.postpath = list(map(unquote, path[1:].split(b'/')))
|
||||
|
||||
if access_token:
|
||||
req.requestHeaders.addRawHeader(
|
||||
|
|
Loading…
Reference in New Issue