Add X-Robots-Tag header to stop crawlers from indexing media (#8887)
Fixes / related to: https://github.com/matrix-org/synapse/issues/6533 This should do essentially the same thing as a robots.txt file telling robots to not index the media repo. https://developers.google.com/search/reference/robots_meta_tag Signed-off-by: Aaron Raimist <aaron@raim.ist>pull/8909/head
parent
ab7a24cc6b
commit
cd9e72b185
|
@ -0,0 +1 @@
|
||||||
|
Add `X-Robots-Tag` header to stop web crawlers from indexing media.
|
|
@ -155,6 +155,11 @@ def add_file_headers(request, media_type, file_size, upload_name):
|
||||||
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
|
request.setHeader(b"Cache-Control", b"public,max-age=86400,s-maxage=86400")
|
||||||
request.setHeader(b"Content-Length", b"%d" % (file_size,))
|
request.setHeader(b"Content-Length", b"%d" % (file_size,))
|
||||||
|
|
||||||
|
# Tell web crawlers to not index, archive, or follow links in media. This
|
||||||
|
# should help to prevent things in the media repo from showing up in web
|
||||||
|
# search results.
|
||||||
|
request.setHeader(b"X-Robots-Tag", "noindex, nofollow, noarchive, noimageindex")
|
||||||
|
|
||||||
|
|
||||||
# separators as defined in RFC2616. SP and HT are handled separately.
|
# separators as defined in RFC2616. SP and HT are handled separately.
|
||||||
# see _can_encode_filename_as_token.
|
# see _can_encode_filename_as_token.
|
||||||
|
|
|
@ -362,3 +362,16 @@ class MediaRepoTests(unittest.HomeserverTestCase):
|
||||||
"error": "Not found [b'example.com', b'12345']",
|
"error": "Not found [b'example.com', b'12345']",
|
||||||
},
|
},
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def test_x_robots_tag_header(self):
|
||||||
|
"""
|
||||||
|
Tests that the `X-Robots-Tag` header is present, which informs web crawlers
|
||||||
|
to not index, archive, or follow links in media.
|
||||||
|
"""
|
||||||
|
channel = self._req(b"inline; filename=out" + self.test_image.extension)
|
||||||
|
|
||||||
|
headers = channel.headers
|
||||||
|
self.assertEqual(
|
||||||
|
headers.getRawHeaders(b"X-Robots-Tag"),
|
||||||
|
[b"noindex, nofollow, noarchive, noimageindex"],
|
||||||
|
)
|
||||||
|
|
Loading…
Reference in New Issue