Fix potential thumbnail memory leaks. (#12932)

pull/12936/head
Erik Johnston 2022-06-01 11:57:49 +01:00 committed by GitHub
parent 2e8763ec96
commit 5949ab86f8
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 137 deletions

1
changelog.d/12932.bugfix Normal file
View File

@ -0,0 +1 @@
Fix potential memory leak when generating thumbnails.

View File

@ -587,15 +587,16 @@ class MediaRepository:
) )
return None return None
t_byte_source = await defer_to_thread( with thumbnailer:
self.hs.get_reactor(), t_byte_source = await defer_to_thread(
self._generate_thumbnail, self.hs.get_reactor(),
thumbnailer, self._generate_thumbnail,
t_width, thumbnailer,
t_height, t_width,
t_method, t_height,
t_type, t_method,
) t_type,
)
if t_byte_source: if t_byte_source:
try: try:
@ -657,15 +658,16 @@ class MediaRepository:
) )
return None return None
t_byte_source = await defer_to_thread( with thumbnailer:
self.hs.get_reactor(), t_byte_source = await defer_to_thread(
self._generate_thumbnail, self.hs.get_reactor(),
thumbnailer, self._generate_thumbnail,
t_width, thumbnailer,
t_height, t_width,
t_method, t_height,
t_type, t_method,
) t_type,
)
if t_byte_source: if t_byte_source:
try: try:
@ -749,119 +751,134 @@ class MediaRepository:
) )
return None return None
m_width = thumbnailer.width with thumbnailer:
m_height = thumbnailer.height m_width = thumbnailer.width
m_height = thumbnailer.height
if m_width * m_height >= self.max_image_pixels: if m_width * m_height >= self.max_image_pixels:
logger.info( logger.info(
"Image too large to thumbnail %r x %r > %r", "Image too large to thumbnail %r x %r > %r",
m_width, m_width,
m_height, m_height,
self.max_image_pixels, self.max_image_pixels,
)
return None
if thumbnailer.transpose_method is not None:
m_width, m_height = await defer_to_thread(
self.hs.get_reactor(), thumbnailer.transpose
)
# We deduplicate the thumbnail sizes by ignoring the cropped versions if
# they have the same dimensions of a scaled one.
thumbnails: Dict[Tuple[int, int, str], str] = {}
for requirement in requirements:
if requirement.method == "crop":
thumbnails.setdefault(
(requirement.width, requirement.height, requirement.media_type),
requirement.method,
) )
elif requirement.method == "scale": return None
t_width, t_height = thumbnailer.aspect(
requirement.width, requirement.height if thumbnailer.transpose_method is not None:
m_width, m_height = await defer_to_thread(
self.hs.get_reactor(), thumbnailer.transpose
) )
t_width = min(m_width, t_width)
t_height = min(m_height, t_height)
thumbnails[
(t_width, t_height, requirement.media_type)
] = requirement.method
# Now we generate the thumbnails for each dimension, store it # We deduplicate the thumbnail sizes by ignoring the cropped versions if
for (t_width, t_height, t_type), t_method in thumbnails.items(): # they have the same dimensions of a scaled one.
# Generate the thumbnail thumbnails: Dict[Tuple[int, int, str], str] = {}
if t_method == "crop": for requirement in requirements:
t_byte_source = await defer_to_thread( if requirement.method == "crop":
self.hs.get_reactor(), thumbnailer.crop, t_width, t_height, t_type thumbnails.setdefault(
) (requirement.width, requirement.height, requirement.media_type),
elif t_method == "scale": requirement.method,
t_byte_source = await defer_to_thread(
self.hs.get_reactor(), thumbnailer.scale, t_width, t_height, t_type
)
else:
logger.error("Unrecognized method: %r", t_method)
continue
if not t_byte_source:
continue
file_info = FileInfo(
server_name=server_name,
file_id=file_id,
url_cache=url_cache,
thumbnail=ThumbnailInfo(
width=t_width,
height=t_height,
method=t_method,
type=t_type,
),
)
with self.media_storage.store_into_file(file_info) as (f, fname, finish):
try:
await self.media_storage.write_to_file(t_byte_source, f)
await finish()
finally:
t_byte_source.close()
t_len = os.path.getsize(fname)
# Write to database
if server_name:
# Multiple remote media download requests can race (when
# using multiple media repos), so this may throw a violation
# constraint exception. If it does we'll delete the newly
# generated thumbnail from disk (as we're in the ctx
# manager).
#
# However: we've already called `finish()` so we may have
# also written to the storage providers. This is preferable
# to the alternative where we call `finish()` *after* this,
# where we could end up having an entry in the DB but fail
# to write the files to the storage providers.
try:
await self.store.store_remote_media_thumbnail(
server_name,
media_id,
file_id,
t_width,
t_height,
t_type,
t_method,
t_len,
)
except Exception as e:
thumbnail_exists = await self.store.get_remote_media_thumbnail(
server_name,
media_id,
t_width,
t_height,
t_type,
)
if not thumbnail_exists:
raise e
else:
await self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
) )
elif requirement.method == "scale":
t_width, t_height = thumbnailer.aspect(
requirement.width, requirement.height
)
t_width = min(m_width, t_width)
t_height = min(m_height, t_height)
thumbnails[
(t_width, t_height, requirement.media_type)
] = requirement.method
# Now we generate the thumbnails for each dimension, store it
for (t_width, t_height, t_type), t_method in thumbnails.items():
# Generate the thumbnail
if t_method == "crop":
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
thumbnailer.crop,
t_width,
t_height,
t_type,
)
elif t_method == "scale":
t_byte_source = await defer_to_thread(
self.hs.get_reactor(),
thumbnailer.scale,
t_width,
t_height,
t_type,
)
else:
logger.error("Unrecognized method: %r", t_method)
continue
if not t_byte_source:
continue
file_info = FileInfo(
server_name=server_name,
file_id=file_id,
url_cache=url_cache,
thumbnail=ThumbnailInfo(
width=t_width,
height=t_height,
method=t_method,
type=t_type,
),
)
with self.media_storage.store_into_file(file_info) as (
f,
fname,
finish,
):
try:
await self.media_storage.write_to_file(t_byte_source, f)
await finish()
finally:
t_byte_source.close()
t_len = os.path.getsize(fname)
# Write to database
if server_name:
# Multiple remote media download requests can race (when
# using multiple media repos), so this may throw a violation
# constraint exception. If it does we'll delete the newly
# generated thumbnail from disk (as we're in the ctx
# manager).
#
# However: we've already called `finish()` so we may have
# also written to the storage providers. This is preferable
# to the alternative where we call `finish()` *after* this,
# where we could end up having an entry in the DB but fail
# to write the files to the storage providers.
try:
await self.store.store_remote_media_thumbnail(
server_name,
media_id,
file_id,
t_width,
t_height,
t_type,
t_method,
t_len,
)
except Exception as e:
thumbnail_exists = (
await self.store.get_remote_media_thumbnail(
server_name,
media_id,
t_width,
t_height,
t_type,
)
)
if not thumbnail_exists:
raise e
else:
await self.store.store_local_thumbnail(
media_id, t_width, t_height, t_type, t_method, t_len
)
return {"width": m_width, "height": m_height} return {"width": m_width, "height": m_height}

View File

@ -14,7 +14,8 @@
# limitations under the License. # limitations under the License.
import logging import logging
from io import BytesIO from io import BytesIO
from typing import Tuple from types import TracebackType
from typing import Optional, Tuple, Type
from PIL import Image from PIL import Image
@ -45,6 +46,9 @@ class Thumbnailer:
Image.MAX_IMAGE_PIXELS = max_image_pixels Image.MAX_IMAGE_PIXELS = max_image_pixels
def __init__(self, input_path: str): def __init__(self, input_path: str):
# Have we closed the image?
self._closed = False
try: try:
self.image = Image.open(input_path) self.image = Image.open(input_path)
except OSError as e: except OSError as e:
@ -89,7 +93,8 @@ class Thumbnailer:
# Safety: `transpose` takes an int rather than e.g. an IntEnum. # Safety: `transpose` takes an int rather than e.g. an IntEnum.
# self.transpose_method is set above to be a value in # self.transpose_method is set above to be a value in
# EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values. # EXIF_TRANSPOSE_MAPPINGS, and that only contains correct values.
self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type] with self.image:
self.image = self.image.transpose(self.transpose_method) # type: ignore[arg-type]
self.width, self.height = self.image.size self.width, self.height = self.image.size
self.transpose_method = None self.transpose_method = None
# We don't need EXIF any more # We don't need EXIF any more
@ -122,9 +127,11 @@ class Thumbnailer:
# If the image has transparency, use RGBA instead. # If the image has transparency, use RGBA instead.
if self.image.mode in ["1", "L", "P"]: if self.image.mode in ["1", "L", "P"]:
if self.image.info.get("transparency", None) is not None: if self.image.info.get("transparency", None) is not None:
self.image = self.image.convert("RGBA") with self.image:
self.image = self.image.convert("RGBA")
else: else:
self.image = self.image.convert("RGB") with self.image:
self.image = self.image.convert("RGB")
return self.image.resize((width, height), Image.ANTIALIAS) return self.image.resize((width, height), Image.ANTIALIAS)
def scale(self, width: int, height: int, output_type: str) -> BytesIO: def scale(self, width: int, height: int, output_type: str) -> BytesIO:
@ -133,8 +140,8 @@ class Thumbnailer:
Returns: Returns:
BytesIO: the bytes of the encoded image ready to be written to disk BytesIO: the bytes of the encoded image ready to be written to disk
""" """
scaled = self._resize(width, height) with self._resize(width, height) as scaled:
return self._encode_image(scaled, output_type) return self._encode_image(scaled, output_type)
def crop(self, width: int, height: int, output_type: str) -> BytesIO: def crop(self, width: int, height: int, output_type: str) -> BytesIO:
"""Rescales and crops the image to the given dimensions preserving """Rescales and crops the image to the given dimensions preserving
@ -151,18 +158,21 @@ class Thumbnailer:
BytesIO: the bytes of the encoded image ready to be written to disk BytesIO: the bytes of the encoded image ready to be written to disk
""" """
if width * self.height > height * self.width: if width * self.height > height * self.width:
scaled_width = width
scaled_height = (width * self.height) // self.width scaled_height = (width * self.height) // self.width
scaled_image = self._resize(width, scaled_height)
crop_top = (scaled_height - height) // 2 crop_top = (scaled_height - height) // 2
crop_bottom = height + crop_top crop_bottom = height + crop_top
cropped = scaled_image.crop((0, crop_top, width, crop_bottom)) crop = (0, crop_top, width, crop_bottom)
else: else:
scaled_width = (height * self.width) // self.height scaled_width = (height * self.width) // self.height
scaled_image = self._resize(scaled_width, height) scaled_height = height
crop_left = (scaled_width - width) // 2 crop_left = (scaled_width - width) // 2
crop_right = width + crop_left crop_right = width + crop_left
cropped = scaled_image.crop((crop_left, 0, crop_right, height)) crop = (crop_left, 0, crop_right, height)
return self._encode_image(cropped, output_type)
with self._resize(scaled_width, scaled_height) as scaled_image:
with scaled_image.crop(crop) as cropped:
return self._encode_image(cropped, output_type)
def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO: def _encode_image(self, output_image: Image.Image, output_type: str) -> BytesIO:
output_bytes_io = BytesIO() output_bytes_io = BytesIO()
@ -171,3 +181,42 @@ class Thumbnailer:
output_image = output_image.convert("RGB") output_image = output_image.convert("RGB")
output_image.save(output_bytes_io, fmt, quality=80) output_image.save(output_bytes_io, fmt, quality=80)
return output_bytes_io return output_bytes_io
def close(self) -> None:
"""Closes the underlying image file.
Once closed no other functions can be called.
Can be called multiple times.
"""
if self._closed:
return
self._closed = True
# Since we run this on the finalizer then we need to handle `__init__`
# raising an exception before it can define `self.image`.
image = getattr(self, "image", None)
if image is None:
return
image.close()
def __enter__(self) -> "Thumbnailer":
"""Make `Thumbnailer` a context manager that calls `close` on
`__exit__`.
"""
return self
def __exit__(
self,
type: Optional[Type[BaseException]],
value: Optional[BaseException],
traceback: Optional[TracebackType],
) -> None:
self.close()
def __del__(self) -> None:
# Make sure we actually do close the image, rather than leak data.
self.close()