fix: [crawler] debug signal timeout

master
terrtia 2025-01-08 15:25:41 +01:00
parent 0287a1380b
commit 9425e01c85
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
1 changed files with 13 additions and 13 deletions

View File

@ -326,20 +326,20 @@ def extract_favicon_from_html(html, url):
# # # # # # # # # # # # # # # #
def extract_title_from_html(html, item_id): def extract_title_from_html(html, item_id):
signal.alarm(60) # signal.alarm(60)
try: # try:
soup = BeautifulSoup(html, 'html.parser') soup = BeautifulSoup(html, 'html.parser')
title = soup.title title = soup.title
if title:
title = title.string
if title: if title:
title = title.string return str(title)
if title: # except TimeoutException:
return str(title) # signal.alarm(0)
except TimeoutException: # logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}')
signal.alarm(0) # else:
logger_crawler.warning(f'BeautifulSoup HTML parser timeout: {item_id}') # signal.alarm(0)
else: # signal.alarm(0)
signal.alarm(0)
signal.alarm(0)
return '' return ''
def extract_description_from_html(html): def extract_description_from_html(html):