From c93ca023a7c7893aca6575576278aad6152696d5 Mon Sep 17 00:00:00 2001 From: Andrew Morgan Date: Sun, 12 Apr 2020 16:18:47 +0100 Subject: [PATCH] Allow specifying the value of Accept-Language header for URL preview --- docs/sample_config.yaml | 25 +++++++++++++++++ synapse/config/repository.py | 27 +++++++++++++++++++ synapse/rest/media/v1/preview_url_resource.py | 8 ++++-- 3 files changed, 58 insertions(+), 2 deletions(-) diff --git a/docs/sample_config.yaml b/docs/sample_config.yaml index 3417813750..81dccbd997 100644 --- a/docs/sample_config.yaml +++ b/docs/sample_config.yaml @@ -859,6 +859,31 @@ media_store_path: "DATADIR/media_store" # #max_spider_size: 10M +# A list of values for the Accept-Language HTTP header used when +# downloading webpages during URL preview generation. This allows +# Synapse to specify the preferred languages that URL previews should +# be in when communicating with remote servers. +# +# Each value is a IETF language tag; a 2-3 letter identifier for a +# language, optionally followed by subtags separated by '-', specifying +# a country or region variant. +# +# Multiple values can be provided, and a weight can be added to each by +# using quality value syntax (;q=). '*' translates to any language. +# +# Defaults to "en". +# +# Example: +# +# url_preview_accept_language: +# - en-UK +# - en-US;q=0.9 +# - fr;q=0.8 +# - *;q=0.7 +# +url_preview_accept_language: +# - en + ## Captcha ## # See docs/CAPTCHA_SETUP for full details of configuring this. diff --git a/synapse/config/repository.py b/synapse/config/repository.py index 7d2dd27fd0..0454c609d6 100644 --- a/synapse/config/repository.py +++ b/synapse/config/repository.py @@ -192,6 +192,8 @@ class ContentRepositoryConfig(Config): self.url_preview_url_blacklist = config.get("url_preview_url_blacklist", ()) + self.url_preview_accept_language = config.get("url_preview_accept_language") or ["en"] + def generate_config_section(self, data_dir_path, **kwargs): media_store = os.path.join(data_dir_path, "media_store") uploads_path = os.path.join(data_dir_path, "uploads") @@ -329,6 +331,31 @@ class ContentRepositoryConfig(Config): # The largest allowed URL preview spidering size in bytes # #max_spider_size: 10M + + # A list of values for the Accept-Language HTTP header used when + # downloading webpages during URL preview generation. This allows + # Synapse to specify the preferred languages that URL previews should + # be in when communicating with remote servers. + # + # Each value is a IETF language tag; a 2-3 letter identifier for a + # language, optionally followed by subtags separated by '-', specifying + # a country or region variant. + # + # Multiple values can be provided, and a weight can be added to each by + # using quality value syntax (;q=). '*' translates to any language. + # + # Defaults to "en". + # + # Example: + # + # url_preview_accept_language: + # - en-UK + # - en-US;q=0.9 + # - fr;q=0.8 + # - *;q=0.7 + # + url_preview_accept_language: + # - en """ % locals() ) diff --git a/synapse/rest/media/v1/preview_url_resource.py b/synapse/rest/media/v1/preview_url_resource.py index c46676f8fc..e72cc1670c 100644 --- a/synapse/rest/media/v1/preview_url_resource.py +++ b/synapse/rest/media/v1/preview_url_resource.py @@ -86,6 +86,7 @@ class PreviewUrlResource(DirectServeResource): self.media_storage = media_storage self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist + self.url_preview_accept_language = hs.config.url_preview_url_blacklist # memory cache mapping urls to an ObservableDeferred returning # JSON-encoded OG metadata @@ -315,9 +316,12 @@ class PreviewUrlResource(DirectServeResource): with self.media_storage.store_into_file(file_info) as (f, fname, finish): try: - logger.debug("Trying to get url '%s'", url) + logger.debug("Trying to get preview for url '%s'", url) length, headers, uri, code = await self.client.get_file( - url, output_stream=f, max_size=self.max_spider_size + url, + output_stream=f, + max_size=self.max_spider_size, + headers={"Accept Language": self.url_preview_accept_language}, ) except SynapseError: # Pass SynapseErrors through directly, so that the servlet