commit
4da3fc0ea0
|
@ -381,7 +381,10 @@ def _calc_og(tree, media_uri):
|
|||
if 'og:title' not in og:
|
||||
# do some basic spidering of the HTML
|
||||
title = tree.xpath("(//title)[1] | (//h1)[1] | (//h2)[1] | (//h3)[1]")
|
||||
og['og:title'] = title[0].text.strip() if title else None
|
||||
if title and title[0].text is not None:
|
||||
og['og:title'] = title[0].text.strip()
|
||||
else:
|
||||
og['og:title'] = None
|
||||
|
||||
if 'og:image' not in og:
|
||||
# TODO: extract a favicon failing all else
|
||||
|
|
|
@ -215,3 +215,53 @@ class PreviewUrlTestCase(unittest.TestCase):
|
|||
u"og:title": u"Foo",
|
||||
u"og:description": u"Some text."
|
||||
})
|
||||
|
||||
def test_missing_title(self):
|
||||
html = u"""
|
||||
<html>
|
||||
<body>
|
||||
Some text.
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
og = decode_and_calc_og(html, "http://example.com/test.html")
|
||||
|
||||
self.assertEquals(og, {
|
||||
u"og:title": None,
|
||||
u"og:description": u"Some text."
|
||||
})
|
||||
|
||||
def test_h1_as_title(self):
|
||||
html = u"""
|
||||
<html>
|
||||
<meta property="og:description" content="Some text."/>
|
||||
<body>
|
||||
<h1>Title</h1>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
og = decode_and_calc_og(html, "http://example.com/test.html")
|
||||
|
||||
self.assertEquals(og, {
|
||||
u"og:title": u"Title",
|
||||
u"og:description": u"Some text."
|
||||
})
|
||||
|
||||
def test_missing_title_and_broken_h1(self):
|
||||
html = u"""
|
||||
<html>
|
||||
<body>
|
||||
<h1><a href="foo"/></h1>
|
||||
Some text.
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
og = decode_and_calc_og(html, "http://example.com/test.html")
|
||||
|
||||
self.assertEquals(og, {
|
||||
u"og:title": None,
|
||||
u"og:description": u"Some text."
|
||||
})
|
||||
|
|
Loading…
Reference in New Issue