197 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
			
		
		
	
	
			197 lines
		
	
	
		
			6.9 KiB
		
	
	
	
		
			Python
		
	
	
# Copyright 2021 The Matrix.org Foundation C.I.C.
 | 
						|
#
 | 
						|
# Licensed under the Apache License, Version 2.0 (the "License");
 | 
						|
# you may not use this file except in compliance with the License.
 | 
						|
# You may obtain a copy of the License at
 | 
						|
#
 | 
						|
#     http://www.apache.org/licenses/LICENSE-2.0
 | 
						|
#
 | 
						|
# Unless required by applicable law or agreed to in writing, software
 | 
						|
# distributed under the License is distributed on an "AS IS" BASIS,
 | 
						|
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 | 
						|
# See the License for the specific language governing permissions and
 | 
						|
# limitations under the License.
 | 
						|
import json
 | 
						|
import re
 | 
						|
from typing import Any, Dict, Iterable, List, Optional, Pattern
 | 
						|
from urllib import parse as urlparse
 | 
						|
 | 
						|
import attr
 | 
						|
import pkg_resources
 | 
						|
 | 
						|
from synapse.types import JsonDict
 | 
						|
 | 
						|
from ._base import Config, ConfigError
 | 
						|
from ._util import validate_config
 | 
						|
 | 
						|
 | 
						|
@attr.s(slots=True, frozen=True, auto_attribs=True)
 | 
						|
class OEmbedEndpointConfig:
 | 
						|
    # The API endpoint to fetch.
 | 
						|
    api_endpoint: str
 | 
						|
    # The patterns to match.
 | 
						|
    url_patterns: List[Pattern]
 | 
						|
    # The supported formats.
 | 
						|
    formats: Optional[List[str]]
 | 
						|
 | 
						|
 | 
						|
class OembedConfig(Config):
 | 
						|
    """oEmbed Configuration"""
 | 
						|
 | 
						|
    section = "oembed"
 | 
						|
 | 
						|
    def read_config(self, config: JsonDict, **kwargs: Any) -> None:
 | 
						|
        oembed_config: Dict[str, Any] = config.get("oembed") or {}
 | 
						|
 | 
						|
        # A list of patterns which will be used.
 | 
						|
        self.oembed_patterns: List[OEmbedEndpointConfig] = list(
 | 
						|
            self._parse_and_validate_providers(oembed_config)
 | 
						|
        )
 | 
						|
 | 
						|
    def _parse_and_validate_providers(
 | 
						|
        self, oembed_config: dict
 | 
						|
    ) -> Iterable[OEmbedEndpointConfig]:
 | 
						|
        """Extract and parse the oEmbed providers from the given JSON file.
 | 
						|
 | 
						|
        Returns a generator which yields the OidcProviderConfig objects
 | 
						|
        """
 | 
						|
        # Whether to use the packaged providers.json file.
 | 
						|
        if not oembed_config.get("disable_default_providers") or False:
 | 
						|
            providers = json.load(
 | 
						|
                pkg_resources.resource_stream("synapse", "res/providers.json")
 | 
						|
            )
 | 
						|
            yield from self._parse_and_validate_provider(
 | 
						|
                providers, config_path=("oembed",)
 | 
						|
            )
 | 
						|
 | 
						|
        # The JSON files which includes additional provider information.
 | 
						|
        for i, file in enumerate(oembed_config.get("additional_providers") or []):
 | 
						|
            # TODO Error checking.
 | 
						|
            with open(file) as f:
 | 
						|
                providers = json.load(f)
 | 
						|
 | 
						|
            yield from self._parse_and_validate_provider(
 | 
						|
                providers,
 | 
						|
                config_path=(
 | 
						|
                    "oembed",
 | 
						|
                    "additional_providers",
 | 
						|
                    f"<item {i}>",
 | 
						|
                ),
 | 
						|
            )
 | 
						|
 | 
						|
    def _parse_and_validate_provider(
 | 
						|
        self, providers: List[JsonDict], config_path: Iterable[str]
 | 
						|
    ) -> Iterable[OEmbedEndpointConfig]:
 | 
						|
        # Ensure it is the proper form.
 | 
						|
        validate_config(
 | 
						|
            _OEMBED_PROVIDER_SCHEMA,
 | 
						|
            providers,
 | 
						|
            config_path=config_path,
 | 
						|
        )
 | 
						|
 | 
						|
        # Parse it and yield each result.
 | 
						|
        for provider in providers:
 | 
						|
            # Each provider might have multiple API endpoints, each which
 | 
						|
            # might have multiple patterns to match.
 | 
						|
            for endpoint in provider["endpoints"]:
 | 
						|
                api_endpoint = endpoint["url"]
 | 
						|
 | 
						|
                # The API endpoint must be an HTTP(S) URL.
 | 
						|
                results = urlparse.urlparse(api_endpoint)
 | 
						|
                if results.scheme not in {"http", "https"}:
 | 
						|
                    raise ConfigError(
 | 
						|
                        f"Unsupported oEmbed scheme ({results.scheme}) for endpoint {api_endpoint}",
 | 
						|
                        config_path,
 | 
						|
                    )
 | 
						|
 | 
						|
                patterns = [
 | 
						|
                    self._glob_to_pattern(glob, config_path)
 | 
						|
                    for glob in endpoint["schemes"]
 | 
						|
                ]
 | 
						|
                yield OEmbedEndpointConfig(
 | 
						|
                    api_endpoint, patterns, endpoint.get("formats")
 | 
						|
                )
 | 
						|
 | 
						|
    def _glob_to_pattern(self, glob: str, config_path: Iterable[str]) -> Pattern:
 | 
						|
        """
 | 
						|
        Convert the glob into a sane regular expression to match against. The
 | 
						|
        rules followed will be slightly different for the domain portion vs.
 | 
						|
        the rest.
 | 
						|
 | 
						|
        1. The scheme must be one of HTTP / HTTPS (and have no globs).
 | 
						|
        2. The domain can have globs, but we limit it to characters that can
 | 
						|
           reasonably be a domain part.
 | 
						|
           TODO: This does not attempt to handle Unicode domain names.
 | 
						|
           TODO: The domain should not allow wildcard TLDs.
 | 
						|
        3. Other parts allow a glob to be any one, or more, characters.
 | 
						|
        """
 | 
						|
        results = urlparse.urlparse(glob)
 | 
						|
 | 
						|
        # The scheme must be HTTP(S) (and cannot contain wildcards).
 | 
						|
        if results.scheme not in {"http", "https"}:
 | 
						|
            raise ConfigError(
 | 
						|
                f"Unsupported oEmbed scheme ({results.scheme}) for pattern: {glob}",
 | 
						|
                config_path,
 | 
						|
            )
 | 
						|
 | 
						|
        pattern = urlparse.urlunparse(
 | 
						|
            [
 | 
						|
                results.scheme,
 | 
						|
                re.escape(results.netloc).replace("\\*", "[a-zA-Z0-9_-]+"),
 | 
						|
            ]
 | 
						|
            + [re.escape(part).replace("\\*", ".+") for part in results[2:]]
 | 
						|
        )
 | 
						|
        return re.compile(pattern)
 | 
						|
 | 
						|
    def generate_config_section(self, **kwargs: Any) -> str:
 | 
						|
        return """\
 | 
						|
        # oEmbed allows for easier embedding content from a website. It can be
 | 
						|
        # used for generating URLs previews of services which support it.
 | 
						|
        #
 | 
						|
        oembed:
 | 
						|
          # A default list of oEmbed providers is included with Synapse.
 | 
						|
          #
 | 
						|
          # Uncomment the following to disable using these default oEmbed URLs.
 | 
						|
          # Defaults to 'false'.
 | 
						|
          #
 | 
						|
          #disable_default_providers: true
 | 
						|
 | 
						|
          # Additional files with oEmbed configuration (each should be in the
 | 
						|
          # form of providers.json).
 | 
						|
          #
 | 
						|
          # By default, this list is empty (so only the default providers.json
 | 
						|
          # is used).
 | 
						|
          #
 | 
						|
          #additional_providers:
 | 
						|
          #  - oembed/my_providers.json
 | 
						|
        """
 | 
						|
 | 
						|
 | 
						|
_OEMBED_PROVIDER_SCHEMA = {
 | 
						|
    "type": "array",
 | 
						|
    "items": {
 | 
						|
        "type": "object",
 | 
						|
        "properties": {
 | 
						|
            "provider_name": {"type": "string"},
 | 
						|
            "provider_url": {"type": "string"},
 | 
						|
            "endpoints": {
 | 
						|
                "type": "array",
 | 
						|
                "items": {
 | 
						|
                    "type": "object",
 | 
						|
                    "properties": {
 | 
						|
                        "schemes": {
 | 
						|
                            "type": "array",
 | 
						|
                            "items": {"type": "string"},
 | 
						|
                        },
 | 
						|
                        "url": {"type": "string"},
 | 
						|
                        "formats": {"type": "array", "items": {"type": "string"}},
 | 
						|
                        "discovery": {"type": "boolean"},
 | 
						|
                    },
 | 
						|
                    "required": ["schemes", "url"],
 | 
						|
                },
 | 
						|
            },
 | 
						|
        },
 | 
						|
        "required": ["provider_name", "provider_url", "endpoints"],
 | 
						|
    },
 | 
						|
}
 |