Merge pull request #902 from matrix-org/erikj/expire_media

Feature: Implement purge_media_cache admin API
pull/903/head
Erik Johnston 2016-06-29 15:50:57 +01:00 committed by GitHub
commit aac546c978
7 changed files with 190 additions and 21 deletions

View File

@ -46,5 +46,37 @@ class WhoisRestServlet(ClientV1RestServlet):
defer.returnValue((200, ret)) defer.returnValue((200, ret))
class PurgeMediaCacheRestServlet(ClientV1RestServlet):
PATTERNS = client_path_patterns("/admin/purge_media_cache")
def __init__(self, hs):
self.media_repository = hs.get_media_repository()
super(PurgeMediaCacheRestServlet, self).__init__(hs)
@defer.inlineCallbacks
def on_POST(self, request):
requester = yield self.auth.get_user_by_req(request)
is_admin = yield self.auth.is_server_admin(requester.user)
if not is_admin:
raise AuthError(403, "You are not a server admin")
before_ts = request.args.get("before_ts", None)
if not before_ts:
raise SynapseError(400, "Missing 'before_ts' arg")
logger.info("before_ts: %r", before_ts[0])
try:
before_ts = int(before_ts[0])
except Exception:
raise SynapseError(400, "Invalid 'before_ts' arg")
ret = yield self.media_repository.delete_old_remote_media(before_ts)
defer.returnValue((200, ret))
def register_servlets(hs, http_server): def register_servlets(hs, http_server):
WhoisRestServlet(hs).register(http_server) WhoisRestServlet(hs).register(http_server)
PurgeMediaCacheRestServlet(hs).register(http_server)

View File

@ -65,3 +65,9 @@ class MediaFilePaths(object):
file_id[0:2], file_id[2:4], file_id[4:], file_id[0:2], file_id[2:4], file_id[4:],
file_name file_name
) )
def remote_media_thumbnail_dir(self, server_name, file_id):
return os.path.join(
self.base_path, "remote_thumbnail", server_name,
file_id[0:2], file_id[2:4], file_id[4:],
)

View File

@ -30,11 +30,13 @@ from synapse.api.errors import SynapseError
from twisted.internet import defer, threads from twisted.internet import defer, threads
from synapse.util.async import ObservableDeferred from synapse.util.async import Linearizer
from synapse.util.stringutils import is_ascii from synapse.util.stringutils import is_ascii
from synapse.util.logcontext import preserve_context_over_fn from synapse.util.logcontext import preserve_context_over_fn
import os import os
import errno
import shutil
import cgi import cgi
import logging import logging
@ -43,8 +45,11 @@ import urlparse
logger = logging.getLogger(__name__) logger = logging.getLogger(__name__)
UPDATE_RECENTLY_ACCESSED_REMOTES_TS = 60 * 1000
class MediaRepository(object): class MediaRepository(object):
def __init__(self, hs, filepaths): def __init__(self, hs):
self.auth = hs.get_auth() self.auth = hs.get_auth()
self.client = MatrixFederationHttpClient(hs) self.client = MatrixFederationHttpClient(hs)
self.clock = hs.get_clock() self.clock = hs.get_clock()
@ -52,11 +57,28 @@ class MediaRepository(object):
self.store = hs.get_datastore() self.store = hs.get_datastore()
self.max_upload_size = hs.config.max_upload_size self.max_upload_size = hs.config.max_upload_size
self.max_image_pixels = hs.config.max_image_pixels self.max_image_pixels = hs.config.max_image_pixels
self.filepaths = filepaths self.filepaths = MediaFilePaths(hs.config.media_store_path)
self.downloads = {}
self.dynamic_thumbnails = hs.config.dynamic_thumbnails self.dynamic_thumbnails = hs.config.dynamic_thumbnails
self.thumbnail_requirements = hs.config.thumbnail_requirements self.thumbnail_requirements = hs.config.thumbnail_requirements
self.remote_media_linearizer = Linearizer()
self.recently_accessed_remotes = set()
self.clock.looping_call(
self._update_recently_accessed_remotes,
UPDATE_RECENTLY_ACCESSED_REMOTES_TS
)
@defer.inlineCallbacks
def _update_recently_accessed_remotes(self):
media = self.recently_accessed_remotes
self.recently_accessed_remotes = set()
yield self.store.update_cached_last_access_time(
media, self.clock.time_msec()
)
@staticmethod @staticmethod
def _makedirs(filepath): def _makedirs(filepath):
dirname = os.path.dirname(filepath) dirname = os.path.dirname(filepath)
@ -93,22 +115,12 @@ class MediaRepository(object):
defer.returnValue("mxc://%s/%s" % (self.server_name, media_id)) defer.returnValue("mxc://%s/%s" % (self.server_name, media_id))
@defer.inlineCallbacks
def get_remote_media(self, server_name, media_id): def get_remote_media(self, server_name, media_id):
key = (server_name, media_id) key = (server_name, media_id)
download = self.downloads.get(key) with (yield self.remote_media_linearizer.queue(key)):
if download is None: media_info = yield self._get_remote_media_impl(server_name, media_id)
download = self._get_remote_media_impl(server_name, media_id) defer.returnValue(media_info)
download = ObservableDeferred(
download,
consumeErrors=True
)
self.downloads[key] = download
@download.addBoth
def callback(media_info):
del self.downloads[key]
return media_info
return download.observe()
@defer.inlineCallbacks @defer.inlineCallbacks
def _get_remote_media_impl(self, server_name, media_id): def _get_remote_media_impl(self, server_name, media_id):
@ -119,6 +131,11 @@ class MediaRepository(object):
media_info = yield self._download_remote_file( media_info = yield self._download_remote_file(
server_name, media_id server_name, media_id
) )
else:
self.recently_accessed_remotes.add((server_name, media_id))
yield self.store.update_cached_last_access_time(
[(server_name, media_id)], self.clock.time_msec()
)
defer.returnValue(media_info) defer.returnValue(media_info)
@defer.inlineCallbacks @defer.inlineCallbacks
@ -416,6 +433,41 @@ class MediaRepository(object):
"height": m_height, "height": m_height,
}) })
@defer.inlineCallbacks
def delete_old_remote_media(self, before_ts):
old_media = yield self.store.get_remote_media_before(before_ts)
deleted = 0
for media in old_media:
origin = media["media_origin"]
media_id = media["media_id"]
file_id = media["filesystem_id"]
key = (origin, media_id)
logger.info("Deleting: %r", key)
with (yield self.remote_media_linearizer.queue(key)):
full_path = self.filepaths.remote_media_filepath(origin, file_id)
try:
os.remove(full_path)
except OSError as e:
logger.warn("Failed to remove file: %r", full_path)
if e.errno == errno.ENOENT:
pass
else:
continue
thumbnail_dir = self.filepaths.remote_media_thumbnail_dir(
origin, file_id
)
shutil.rmtree(thumbnail_dir, ignore_errors=True)
yield self.store.delete_remote_media(origin, media_id)
deleted += 1
defer.returnValue({"deleted": deleted})
class MediaRepositoryResource(Resource): class MediaRepositoryResource(Resource):
"""File uploading and downloading. """File uploading and downloading.
@ -464,9 +516,8 @@ class MediaRepositoryResource(Resource):
def __init__(self, hs): def __init__(self, hs):
Resource.__init__(self) Resource.__init__(self)
filepaths = MediaFilePaths(hs.config.media_store_path)
media_repo = MediaRepository(hs, filepaths) media_repo = hs.get_media_repository()
self.putChild("upload", UploadResource(hs, media_repo)) self.putChild("upload", UploadResource(hs, media_repo))
self.putChild("download", DownloadResource(hs, media_repo)) self.putChild("download", DownloadResource(hs, media_repo))

View File

@ -45,6 +45,7 @@ from synapse.crypto.keyring import Keyring
from synapse.push.pusherpool import PusherPool from synapse.push.pusherpool import PusherPool
from synapse.events.builder import EventBuilderFactory from synapse.events.builder import EventBuilderFactory
from synapse.api.filtering import Filtering from synapse.api.filtering import Filtering
from synapse.rest.media.v1.media_repository import MediaRepository
from synapse.http.matrixfederationclient import MatrixFederationHttpClient from synapse.http.matrixfederationclient import MatrixFederationHttpClient
@ -113,6 +114,7 @@ class HomeServer(object):
'filtering', 'filtering',
'http_client_context_factory', 'http_client_context_factory',
'simple_http_client', 'simple_http_client',
'media_repository',
] ]
def __init__(self, hostname, **kwargs): def __init__(self, hostname, **kwargs):
@ -233,6 +235,9 @@ class HomeServer(object):
**self.db_config.get("args", {}) **self.db_config.get("args", {})
) )
def build_media_repository(self):
return MediaRepository(self)
def remove_pusher(self, app_id, push_key, user_id): def remove_pusher(self, app_id, push_key, user_id):
return self.get_pusherpool().remove_pusher(app_id, push_key, user_id) return self.get_pusherpool().remove_pusher(app_id, push_key, user_id)

View File

@ -157,10 +157,25 @@ class MediaRepositoryStore(SQLBaseStore):
"created_ts": time_now_ms, "created_ts": time_now_ms,
"upload_name": upload_name, "upload_name": upload_name,
"filesystem_id": filesystem_id, "filesystem_id": filesystem_id,
"last_access_ts": time_now_ms,
}, },
desc="store_cached_remote_media", desc="store_cached_remote_media",
) )
def update_cached_last_access_time(self, origin_id_tuples, time_ts):
def update_cache_txn(txn):
sql = (
"UPDATE remote_media_cache SET last_access_ts = ?"
" WHERE media_origin = ? AND media_id = ?"
)
txn.executemany(sql, (
(time_ts, media_origin, media_id)
for media_origin, media_id in origin_id_tuples
))
return self.runInteraction("update_cached_last_access_time", update_cache_txn)
def get_remote_media_thumbnails(self, origin, media_id): def get_remote_media_thumbnails(self, origin, media_id):
return self._simple_select_list( return self._simple_select_list(
"remote_media_cache_thumbnails", "remote_media_cache_thumbnails",
@ -190,3 +205,32 @@ class MediaRepositoryStore(SQLBaseStore):
}, },
desc="store_remote_media_thumbnail", desc="store_remote_media_thumbnail",
) )
def get_remote_media_before(self, before_ts):
sql = (
"SELECT media_origin, media_id, filesystem_id"
" FROM remote_media_cache"
" WHERE last_access_ts < ?"
)
return self._execute(
"get_remote_media_before", self.cursor_to_dict, sql, before_ts
)
def delete_remote_media(self, media_origin, media_id):
def delete_remote_media_txn(txn):
self._simple_delete_txn(
txn,
"remote_media_cache",
keyvalues={
"media_origin": media_origin, "media_id": media_id
},
)
self._simple_delete_txn(
txn,
"remote_media_cache_thumbnails",
keyvalues={
"media_origin": media_origin, "media_id": media_id
},
)
return self.runInteraction("delete_remote_media", delete_remote_media_txn)

View File

@ -25,7 +25,7 @@ logger = logging.getLogger(__name__)
# Remember to update this number every time a change is made to database # Remember to update this number every time a change is made to database
# schema files, so the users will be informed on server restarts. # schema files, so the users will be informed on server restarts.
SCHEMA_VERSION = 32 SCHEMA_VERSION = 33
dir_path = os.path.abspath(os.path.dirname(__file__)) dir_path = os.path.abspath(os.path.dirname(__file__))

View File

@ -0,0 +1,31 @@
# Copyright 2016 OpenMarket Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import time
ALTER_TABLE = "ALTER TABLE remote_media_cache ADD COLUMN last_access_ts BIGINT"
def run_create(cur, database_engine, *args, **kwargs):
cur.execute(ALTER_TABLE)
def run_upgrade(cur, database_engine, *args, **kwargs):
cur.execute(
database_engine.convert_param_style(
"UPDATE remote_media_cache SET last_access_ts = ?"
),
(int(time.time() * 1000),)
)