Merge pull request #760 from matrix-org/matthew/preview_url_ip_whitelist
add a url_preview_ip_range_whitelist config parampull/788/head
						commit
						2d98c960ec
					
				|  | @ -100,8 +100,13 @@ class ContentRepositoryConfig(Config): | |||
|                     "to work" | ||||
|                 ) | ||||
| 
 | ||||
|             if "url_preview_url_blacklist" in config: | ||||
|                 self.url_preview_url_blacklist = config["url_preview_url_blacklist"] | ||||
|             self.url_preview_ip_range_whitelist = IPSet( | ||||
|                 config.get("url_preview_ip_range_whitelist", ()) | ||||
|             ) | ||||
| 
 | ||||
|             self.url_preview_url_blacklist = config.get( | ||||
|                 "url_preview_url_blacklist", () | ||||
|             ) | ||||
| 
 | ||||
|     def default_config(self, **kwargs): | ||||
|         media_store = self.default_path("media_store") | ||||
|  | @ -162,6 +167,15 @@ class ContentRepositoryConfig(Config): | |||
|         # - '10.0.0.0/8' | ||||
|         # - '172.16.0.0/12' | ||||
|         # - '192.168.0.0/16' | ||||
|         # | ||||
|         # List of IP address CIDR ranges that the URL preview spider is allowed | ||||
|         # to access even if they are specified in url_preview_ip_range_blacklist. | ||||
|         # This is useful for specifying exceptions to wide-ranging blacklisted | ||||
|         # target IP ranges - e.g. for enabling URL previews for a specific private | ||||
|         # website only visible in your network. | ||||
|         # | ||||
|         # url_preview_ip_range_whitelist: | ||||
|         # - '192.168.1.1' | ||||
| 
 | ||||
|         # Optional list of URL matches that the URL preview spider is | ||||
|         # denied from accessing.  You should use url_preview_ip_range_blacklist | ||||
|  |  | |||
|  | @ -380,13 +380,14 @@ class CaptchaServerHttpClient(SimpleHttpClient): | |||
| class SpiderEndpointFactory(object): | ||||
|     def __init__(self, hs): | ||||
|         self.blacklist = hs.config.url_preview_ip_range_blacklist | ||||
|         self.whitelist = hs.config.url_preview_ip_range_whitelist | ||||
|         self.policyForHTTPS = hs.get_http_client_context_factory() | ||||
| 
 | ||||
|     def endpointForURI(self, uri): | ||||
|         logger.info("Getting endpoint for %s", uri.toBytes()) | ||||
|         if uri.scheme == "http": | ||||
|             return SpiderEndpoint( | ||||
|                 reactor, uri.host, uri.port, self.blacklist, | ||||
|                 reactor, uri.host, uri.port, self.blacklist, self.whitelist, | ||||
|                 endpoint=TCP4ClientEndpoint, | ||||
|                 endpoint_kw_args={ | ||||
|                     'timeout': 15 | ||||
|  | @ -395,7 +396,7 @@ class SpiderEndpointFactory(object): | |||
|         elif uri.scheme == "https": | ||||
|             tlsPolicy = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port) | ||||
|             return SpiderEndpoint( | ||||
|                 reactor, uri.host, uri.port, self.blacklist, | ||||
|                 reactor, uri.host, uri.port, self.blacklist, self.whitelist, | ||||
|                 endpoint=SSL4ClientEndpoint, | ||||
|                 endpoint_kw_args={ | ||||
|                     'sslContextFactory': tlsPolicy, | ||||
|  |  | |||
|  | @ -79,12 +79,13 @@ class SpiderEndpoint(object): | |||
|     """An endpoint which refuses to connect to blacklisted IP addresses | ||||
|     Implements twisted.internet.interfaces.IStreamClientEndpoint. | ||||
|     """ | ||||
|     def __init__(self, reactor, host, port, blacklist, | ||||
|     def __init__(self, reactor, host, port, blacklist, whitelist, | ||||
|                  endpoint=TCP4ClientEndpoint, endpoint_kw_args={}): | ||||
|         self.reactor = reactor | ||||
|         self.host = host | ||||
|         self.port = port | ||||
|         self.blacklist = blacklist | ||||
|         self.whitelist = whitelist | ||||
|         self.endpoint = endpoint | ||||
|         self.endpoint_kw_args = endpoint_kw_args | ||||
| 
 | ||||
|  | @ -93,10 +94,13 @@ class SpiderEndpoint(object): | |||
|         address = yield self.reactor.resolve(self.host) | ||||
| 
 | ||||
|         from netaddr import IPAddress | ||||
|         if IPAddress(address) in self.blacklist: | ||||
|             raise ConnectError( | ||||
|                 "Refusing to spider blacklisted IP address %s" % address | ||||
|             ) | ||||
|         ip_address = IPAddress(address) | ||||
| 
 | ||||
|         if ip_address in self.blacklist: | ||||
|             if self.whitelist is None or ip_address not in self.whitelist: | ||||
|                 raise ConnectError( | ||||
|                     "Refusing to spider blacklisted IP address %s" % address | ||||
|                 ) | ||||
| 
 | ||||
|         logger.info("Connecting to %s:%s", address, self.port) | ||||
|         endpoint = self.endpoint( | ||||
|  |  | |||
|  | @ -56,8 +56,7 @@ class PreviewUrlResource(Resource): | |||
|         self.client = SpiderHttpClient(hs) | ||||
|         self.media_repo = media_repo | ||||
| 
 | ||||
|         if hasattr(hs.config, "url_preview_url_blacklist"): | ||||
|             self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist | ||||
|         self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist | ||||
| 
 | ||||
|         # simple memory cache mapping urls to OG metadata | ||||
|         self.cache = ExpiringCache( | ||||
|  | @ -86,39 +85,37 @@ class PreviewUrlResource(Resource): | |||
|         else: | ||||
|             ts = self.clock.time_msec() | ||||
| 
 | ||||
|         # impose the URL pattern blacklist | ||||
|         if hasattr(self, "url_preview_url_blacklist"): | ||||
|             url_tuple = urlparse.urlsplit(url) | ||||
|             for entry in self.url_preview_url_blacklist: | ||||
|                 match = True | ||||
|                 for attrib in entry: | ||||
|                     pattern = entry[attrib] | ||||
|                     value = getattr(url_tuple, attrib) | ||||
|                     logger.debug(( | ||||
|                         "Matching attrib '%s' with value '%s' against" | ||||
|                         " pattern '%s'" | ||||
|                     ) % (attrib, value, pattern)) | ||||
|         url_tuple = urlparse.urlsplit(url) | ||||
|         for entry in self.url_preview_url_blacklist: | ||||
|             match = True | ||||
|             for attrib in entry: | ||||
|                 pattern = entry[attrib] | ||||
|                 value = getattr(url_tuple, attrib) | ||||
|                 logger.debug(( | ||||
|                     "Matching attrib '%s' with value '%s' against" | ||||
|                     " pattern '%s'" | ||||
|                 ) % (attrib, value, pattern)) | ||||
| 
 | ||||
|                     if value is None: | ||||
|                 if value is None: | ||||
|                     match = False | ||||
|                     continue | ||||
| 
 | ||||
|                 if pattern.startswith('^'): | ||||
|                     if not re.match(pattern, getattr(url_tuple, attrib)): | ||||
|                         match = False | ||||
|                         continue | ||||
| 
 | ||||
|                     if pattern.startswith('^'): | ||||
|                         if not re.match(pattern, getattr(url_tuple, attrib)): | ||||
|                             match = False | ||||
|                             continue | ||||
|                     else: | ||||
|                         if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): | ||||
|                             match = False | ||||
|                             continue | ||||
|                 if match: | ||||
|                     logger.warn( | ||||
|                         "URL %s blocked by url_blacklist entry %s", url, entry | ||||
|                     ) | ||||
|                     raise SynapseError( | ||||
|                         403, "URL blocked by url pattern blacklist entry", | ||||
|                         Codes.UNKNOWN | ||||
|                     ) | ||||
|                 else: | ||||
|                     if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern): | ||||
|                         match = False | ||||
|                         continue | ||||
|             if match: | ||||
|                 logger.warn( | ||||
|                     "URL %s blocked by url_blacklist entry %s", url, entry | ||||
|                 ) | ||||
|                 raise SynapseError( | ||||
|                     403, "URL blocked by url pattern blacklist entry", | ||||
|                     Codes.UNKNOWN | ||||
|                 ) | ||||
| 
 | ||||
|         # first check the memory cache - good to handle all the clients on this | ||||
|         # HS thundering away to preview the same URL at the same time. | ||||
|  |  | |||
		Loading…
	
		Reference in New Issue
	
	 Matthew Hodgson
						Matthew Hodgson