213 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			213 lines
		
	
	
		
			7.9 KiB
		
	
	
	
		
			Python
		
	
	
| # -*- coding: utf-8 -*-
 | |
| # Copyright 2014 OpenMarket Ltd
 | |
| #
 | |
| # Licensed under the Apache License, Version 2.0 (the "License");
 | |
| # you may not use this file except in compliance with the License.
 | |
| # You may obtain a copy of the License at
 | |
| #
 | |
| #     http://www.apache.org/licenses/LICENSE-2.0
 | |
| #
 | |
| # Unless required by applicable law or agreed to in writing, software
 | |
| # distributed under the License is distributed on an "AS IS" BASIS,
 | |
| # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | |
| # See the License for the specific language governing permissions and
 | |
| # limitations under the License.
 | |
| 
 | |
| from .server import respond_with_json_bytes
 | |
| 
 | |
| from synapse.util.stringutils import random_string
 | |
| from synapse.api.errors import (
 | |
|     cs_exception, SynapseError, CodeMessageException, Codes, cs_error
 | |
| )
 | |
| 
 | |
| from twisted.protocols.basic import FileSender
 | |
| from twisted.web import server, resource
 | |
| from twisted.internet import defer
 | |
| 
 | |
| import base64
 | |
| import json
 | |
| import logging
 | |
| import os
 | |
| import re
 | |
| 
 | |
| logger = logging.getLogger(__name__)
 | |
| 
 | |
| 
 | |
| class ContentRepoResource(resource.Resource):
 | |
|     """Provides file uploading and downloading.
 | |
| 
 | |
|     Uploads are POSTed to wherever this Resource is linked to. This resource
 | |
|     returns a "content token" which can be used to GET this content again. The
 | |
|     token is typically a path, but it may not be. Tokens can expire, be
 | |
|     one-time uses, etc.
 | |
| 
 | |
|     In this case, the token is a path to the file and contains 3 interesting
 | |
|     sections:
 | |
|         - User ID base64d (for namespacing content to each user)
 | |
|         - random 24 char string
 | |
|         - Content type base64d (so we can return it when clients GET it)
 | |
| 
 | |
|     """
 | |
|     isLeaf = True
 | |
| 
 | |
|     def __init__(self, hs, directory, auth, external_addr):
 | |
|         resource.Resource.__init__(self)
 | |
|         self.hs = hs
 | |
|         self.directory = directory
 | |
|         self.auth = auth
 | |
|         self.external_addr = external_addr.rstrip('/')
 | |
|         self.max_upload_size = hs.config.max_upload_size
 | |
| 
 | |
|         if not os.path.isdir(self.directory):
 | |
|             os.mkdir(self.directory)
 | |
|             logger.info("ContentRepoResource : Created %s directory.",
 | |
|                         self.directory)
 | |
| 
 | |
|     @defer.inlineCallbacks
 | |
|     def map_request_to_name(self, request):
 | |
|         # auth the user
 | |
|         auth_user = yield self.auth.get_user_by_req(request)
 | |
| 
 | |
|         # namespace all file uploads on the user
 | |
|         prefix = base64.urlsafe_b64encode(
 | |
|             auth_user.to_string()
 | |
|         ).replace('=', '')
 | |
| 
 | |
|         # use a random string for the main portion
 | |
|         main_part = random_string(24)
 | |
| 
 | |
|         # suffix with a file extension if we can make one. This is nice to
 | |
|         # provide a hint to clients on the file information. We will also reuse
 | |
|         # this info to spit back the content type to the client.
 | |
|         suffix = ""
 | |
|         if request.requestHeaders.hasHeader("Content-Type"):
 | |
|             content_type = request.requestHeaders.getRawHeaders(
 | |
|                 "Content-Type")[0]
 | |
|             suffix = "." + base64.urlsafe_b64encode(content_type)
 | |
|             if (content_type.split("/")[0].lower() in
 | |
|                     ["image", "video", "audio"]):
 | |
|                 file_ext = content_type.split("/")[-1]
 | |
|                 # be a little paranoid and only allow a-z
 | |
|                 file_ext = re.sub("[^a-z]", "", file_ext)
 | |
|                 suffix += "." + file_ext
 | |
| 
 | |
|         file_name = prefix + main_part + suffix
 | |
|         file_path = os.path.join(self.directory, file_name)
 | |
|         logger.info("User %s is uploading a file to path %s",
 | |
|                     auth_user.to_string(),
 | |
|                     file_path)
 | |
| 
 | |
|         # keep trying to make a non-clashing file, with a sensible max attempts
 | |
|         attempts = 0
 | |
|         while os.path.exists(file_path):
 | |
|             main_part = random_string(24)
 | |
|             file_name = prefix + main_part + suffix
 | |
|             file_path = os.path.join(self.directory, file_name)
 | |
|             attempts += 1
 | |
|             if attempts > 25:  # really? Really?
 | |
|                 raise SynapseError(500, "Unable to create file.")
 | |
| 
 | |
|         defer.returnValue(file_path)
 | |
| 
 | |
|     def render_GET(self, request):
 | |
|         # no auth here on purpose, to allow anyone to view, even across home
 | |
|         # servers.
 | |
| 
 | |
|         # TODO: A little crude here, we could do this better.
 | |
|         filename = request.path.split('/')[-1]
 | |
|         # be paranoid
 | |
|         filename = re.sub("[^0-9A-z.-_]", "", filename)
 | |
| 
 | |
|         file_path = self.directory + "/" + filename
 | |
| 
 | |
|         logger.debug("Searching for %s", file_path)
 | |
| 
 | |
|         if os.path.isfile(file_path):
 | |
|             # filename has the content type
 | |
|             base64_contentype = filename.split(".")[1]
 | |
|             content_type = base64.urlsafe_b64decode(base64_contentype)
 | |
|             logger.info("Sending file %s", file_path)
 | |
|             f = open(file_path, 'rb')
 | |
|             request.setHeader('Content-Type', content_type)
 | |
| 
 | |
|             # cache for at least a day.
 | |
|             # XXX: we might want to turn this off for data we don't want to
 | |
|             # recommend caching as it's sensitive or private - or at least
 | |
|             # select private. don't bother setting Expires as all our matrix
 | |
|             # clients are smart enough to be happy with Cache-Control (right?)
 | |
|             request.setHeader(
 | |
|                 "Cache-Control", "public,max-age=86400,s-maxage=86400"
 | |
|             )
 | |
| 
 | |
|             d = FileSender().beginFileTransfer(f, request)
 | |
| 
 | |
|             # after the file has been sent, clean up and finish the request
 | |
|             def cbFinished(ignored):
 | |
|                 f.close()
 | |
|                 request.finish()
 | |
|             d.addCallback(cbFinished)
 | |
|         else:
 | |
|             respond_with_json_bytes(
 | |
|                 request,
 | |
|                 404,
 | |
|                 json.dumps(cs_error("Not found", code=Codes.NOT_FOUND)),
 | |
|                 send_cors=True)
 | |
| 
 | |
|         return server.NOT_DONE_YET
 | |
| 
 | |
|     def render_POST(self, request):
 | |
|         self._async_render(request)
 | |
|         return server.NOT_DONE_YET
 | |
| 
 | |
|     def render_OPTIONS(self, request):
 | |
|         respond_with_json_bytes(request, 200, {}, send_cors=True)
 | |
|         return server.NOT_DONE_YET
 | |
| 
 | |
|     @defer.inlineCallbacks
 | |
|     def _async_render(self, request):
 | |
|         try:
 | |
|             # TODO: The checks here are a bit late. The content will have
 | |
|             # already been uploaded to a tmp file at this point
 | |
|             content_length = request.getHeader("Content-Length")
 | |
|             if content_length is None:
 | |
|                 raise SynapseError(
 | |
|                     msg="Request must specify a Content-Length", code=400
 | |
|                 )
 | |
|             if int(content_length) > self.max_upload_size:
 | |
|                 raise SynapseError(
 | |
|                     msg="Upload request body is too large",
 | |
|                     code=413,
 | |
|                 )
 | |
| 
 | |
|             fname = yield self.map_request_to_name(request)
 | |
| 
 | |
|             # TODO I have a suspicious feeling this is just going to block
 | |
|             with open(fname, "wb") as f:
 | |
|                 f.write(request.content.read())
 | |
| 
 | |
|             # FIXME (erikj): These should use constants.
 | |
|             file_name = os.path.basename(fname)
 | |
|             # FIXME: we can't assume what the repo's public mounted path is
 | |
|             # ...plus self-signed SSL won't work to remote clients anyway
 | |
|             # ...and we can't assume that it's SSL anyway, as we might want to
 | |
|             # serve it via the non-SSL listener...
 | |
|             url = "%s/_matrix/content/%s" % (
 | |
|                 self.external_addr, file_name
 | |
|             )
 | |
| 
 | |
|             respond_with_json_bytes(request, 200,
 | |
|                                     json.dumps({"content_token": url}),
 | |
|                                     send_cors=True)
 | |
| 
 | |
|         except CodeMessageException as e:
 | |
|             logger.exception(e)
 | |
|             respond_with_json_bytes(request, e.code,
 | |
|                                     json.dumps(cs_exception(e)))
 | |
|         except Exception as e:
 | |
|             logger.error("Failed to store file: %s" % e)
 | |
|             respond_with_json_bytes(
 | |
|                 request,
 | |
|                 500,
 | |
|                 json.dumps({"error": "Internal server error"}),
 | |
|                 send_cors=True)
 |