Merge pull request #760 from matrix-org/matthew/preview_url_ip_whitelist
add a url_preview_ip_range_whitelist config parampull/788/head
commit
2d98c960ec
|
@ -100,8 +100,13 @@ class ContentRepositoryConfig(Config):
|
||||||
"to work"
|
"to work"
|
||||||
)
|
)
|
||||||
|
|
||||||
if "url_preview_url_blacklist" in config:
|
self.url_preview_ip_range_whitelist = IPSet(
|
||||||
self.url_preview_url_blacklist = config["url_preview_url_blacklist"]
|
config.get("url_preview_ip_range_whitelist", ())
|
||||||
|
)
|
||||||
|
|
||||||
|
self.url_preview_url_blacklist = config.get(
|
||||||
|
"url_preview_url_blacklist", ()
|
||||||
|
)
|
||||||
|
|
||||||
def default_config(self, **kwargs):
|
def default_config(self, **kwargs):
|
||||||
media_store = self.default_path("media_store")
|
media_store = self.default_path("media_store")
|
||||||
|
@ -162,6 +167,15 @@ class ContentRepositoryConfig(Config):
|
||||||
# - '10.0.0.0/8'
|
# - '10.0.0.0/8'
|
||||||
# - '172.16.0.0/12'
|
# - '172.16.0.0/12'
|
||||||
# - '192.168.0.0/16'
|
# - '192.168.0.0/16'
|
||||||
|
#
|
||||||
|
# List of IP address CIDR ranges that the URL preview spider is allowed
|
||||||
|
# to access even if they are specified in url_preview_ip_range_blacklist.
|
||||||
|
# This is useful for specifying exceptions to wide-ranging blacklisted
|
||||||
|
# target IP ranges - e.g. for enabling URL previews for a specific private
|
||||||
|
# website only visible in your network.
|
||||||
|
#
|
||||||
|
# url_preview_ip_range_whitelist:
|
||||||
|
# - '192.168.1.1'
|
||||||
|
|
||||||
# Optional list of URL matches that the URL preview spider is
|
# Optional list of URL matches that the URL preview spider is
|
||||||
# denied from accessing. You should use url_preview_ip_range_blacklist
|
# denied from accessing. You should use url_preview_ip_range_blacklist
|
||||||
|
|
|
@ -380,13 +380,14 @@ class CaptchaServerHttpClient(SimpleHttpClient):
|
||||||
class SpiderEndpointFactory(object):
|
class SpiderEndpointFactory(object):
|
||||||
def __init__(self, hs):
|
def __init__(self, hs):
|
||||||
self.blacklist = hs.config.url_preview_ip_range_blacklist
|
self.blacklist = hs.config.url_preview_ip_range_blacklist
|
||||||
|
self.whitelist = hs.config.url_preview_ip_range_whitelist
|
||||||
self.policyForHTTPS = hs.get_http_client_context_factory()
|
self.policyForHTTPS = hs.get_http_client_context_factory()
|
||||||
|
|
||||||
def endpointForURI(self, uri):
|
def endpointForURI(self, uri):
|
||||||
logger.info("Getting endpoint for %s", uri.toBytes())
|
logger.info("Getting endpoint for %s", uri.toBytes())
|
||||||
if uri.scheme == "http":
|
if uri.scheme == "http":
|
||||||
return SpiderEndpoint(
|
return SpiderEndpoint(
|
||||||
reactor, uri.host, uri.port, self.blacklist,
|
reactor, uri.host, uri.port, self.blacklist, self.whitelist,
|
||||||
endpoint=TCP4ClientEndpoint,
|
endpoint=TCP4ClientEndpoint,
|
||||||
endpoint_kw_args={
|
endpoint_kw_args={
|
||||||
'timeout': 15
|
'timeout': 15
|
||||||
|
@ -395,7 +396,7 @@ class SpiderEndpointFactory(object):
|
||||||
elif uri.scheme == "https":
|
elif uri.scheme == "https":
|
||||||
tlsPolicy = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port)
|
tlsPolicy = self.policyForHTTPS.creatorForNetloc(uri.host, uri.port)
|
||||||
return SpiderEndpoint(
|
return SpiderEndpoint(
|
||||||
reactor, uri.host, uri.port, self.blacklist,
|
reactor, uri.host, uri.port, self.blacklist, self.whitelist,
|
||||||
endpoint=SSL4ClientEndpoint,
|
endpoint=SSL4ClientEndpoint,
|
||||||
endpoint_kw_args={
|
endpoint_kw_args={
|
||||||
'sslContextFactory': tlsPolicy,
|
'sslContextFactory': tlsPolicy,
|
||||||
|
|
|
@ -79,12 +79,13 @@ class SpiderEndpoint(object):
|
||||||
"""An endpoint which refuses to connect to blacklisted IP addresses
|
"""An endpoint which refuses to connect to blacklisted IP addresses
|
||||||
Implements twisted.internet.interfaces.IStreamClientEndpoint.
|
Implements twisted.internet.interfaces.IStreamClientEndpoint.
|
||||||
"""
|
"""
|
||||||
def __init__(self, reactor, host, port, blacklist,
|
def __init__(self, reactor, host, port, blacklist, whitelist,
|
||||||
endpoint=TCP4ClientEndpoint, endpoint_kw_args={}):
|
endpoint=TCP4ClientEndpoint, endpoint_kw_args={}):
|
||||||
self.reactor = reactor
|
self.reactor = reactor
|
||||||
self.host = host
|
self.host = host
|
||||||
self.port = port
|
self.port = port
|
||||||
self.blacklist = blacklist
|
self.blacklist = blacklist
|
||||||
|
self.whitelist = whitelist
|
||||||
self.endpoint = endpoint
|
self.endpoint = endpoint
|
||||||
self.endpoint_kw_args = endpoint_kw_args
|
self.endpoint_kw_args = endpoint_kw_args
|
||||||
|
|
||||||
|
@ -93,10 +94,13 @@ class SpiderEndpoint(object):
|
||||||
address = yield self.reactor.resolve(self.host)
|
address = yield self.reactor.resolve(self.host)
|
||||||
|
|
||||||
from netaddr import IPAddress
|
from netaddr import IPAddress
|
||||||
if IPAddress(address) in self.blacklist:
|
ip_address = IPAddress(address)
|
||||||
raise ConnectError(
|
|
||||||
"Refusing to spider blacklisted IP address %s" % address
|
if ip_address in self.blacklist:
|
||||||
)
|
if self.whitelist is None or ip_address not in self.whitelist:
|
||||||
|
raise ConnectError(
|
||||||
|
"Refusing to spider blacklisted IP address %s" % address
|
||||||
|
)
|
||||||
|
|
||||||
logger.info("Connecting to %s:%s", address, self.port)
|
logger.info("Connecting to %s:%s", address, self.port)
|
||||||
endpoint = self.endpoint(
|
endpoint = self.endpoint(
|
||||||
|
|
|
@ -56,8 +56,7 @@ class PreviewUrlResource(Resource):
|
||||||
self.client = SpiderHttpClient(hs)
|
self.client = SpiderHttpClient(hs)
|
||||||
self.media_repo = media_repo
|
self.media_repo = media_repo
|
||||||
|
|
||||||
if hasattr(hs.config, "url_preview_url_blacklist"):
|
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
|
||||||
self.url_preview_url_blacklist = hs.config.url_preview_url_blacklist
|
|
||||||
|
|
||||||
# simple memory cache mapping urls to OG metadata
|
# simple memory cache mapping urls to OG metadata
|
||||||
self.cache = ExpiringCache(
|
self.cache = ExpiringCache(
|
||||||
|
@ -86,39 +85,37 @@ class PreviewUrlResource(Resource):
|
||||||
else:
|
else:
|
||||||
ts = self.clock.time_msec()
|
ts = self.clock.time_msec()
|
||||||
|
|
||||||
# impose the URL pattern blacklist
|
url_tuple = urlparse.urlsplit(url)
|
||||||
if hasattr(self, "url_preview_url_blacklist"):
|
for entry in self.url_preview_url_blacklist:
|
||||||
url_tuple = urlparse.urlsplit(url)
|
match = True
|
||||||
for entry in self.url_preview_url_blacklist:
|
for attrib in entry:
|
||||||
match = True
|
pattern = entry[attrib]
|
||||||
for attrib in entry:
|
value = getattr(url_tuple, attrib)
|
||||||
pattern = entry[attrib]
|
logger.debug((
|
||||||
value = getattr(url_tuple, attrib)
|
"Matching attrib '%s' with value '%s' against"
|
||||||
logger.debug((
|
" pattern '%s'"
|
||||||
"Matching attrib '%s' with value '%s' against"
|
) % (attrib, value, pattern))
|
||||||
" pattern '%s'"
|
|
||||||
) % (attrib, value, pattern))
|
|
||||||
|
|
||||||
if value is None:
|
if value is None:
|
||||||
|
match = False
|
||||||
|
continue
|
||||||
|
|
||||||
|
if pattern.startswith('^'):
|
||||||
|
if not re.match(pattern, getattr(url_tuple, attrib)):
|
||||||
match = False
|
match = False
|
||||||
continue
|
continue
|
||||||
|
else:
|
||||||
if pattern.startswith('^'):
|
if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
|
||||||
if not re.match(pattern, getattr(url_tuple, attrib)):
|
match = False
|
||||||
match = False
|
continue
|
||||||
continue
|
if match:
|
||||||
else:
|
logger.warn(
|
||||||
if not fnmatch.fnmatch(getattr(url_tuple, attrib), pattern):
|
"URL %s blocked by url_blacklist entry %s", url, entry
|
||||||
match = False
|
)
|
||||||
continue
|
raise SynapseError(
|
||||||
if match:
|
403, "URL blocked by url pattern blacklist entry",
|
||||||
logger.warn(
|
Codes.UNKNOWN
|
||||||
"URL %s blocked by url_blacklist entry %s", url, entry
|
)
|
||||||
)
|
|
||||||
raise SynapseError(
|
|
||||||
403, "URL blocked by url pattern blacklist entry",
|
|
||||||
Codes.UNKNOWN
|
|
||||||
)
|
|
||||||
|
|
||||||
# first check the memory cache - good to handle all the clients on this
|
# first check the memory cache - good to handle all the clients on this
|
||||||
# HS thundering away to preview the same URL at the same time.
|
# HS thundering away to preview the same URL at the same time.
|
||||||
|
|
Loading…
Reference in New Issue