mirror of https://github.com/CIRCL/AIL-framework
fix: [PgpDump] catch bs4 error
parent
3ec72b0430
commit
0ec56cf1ed
|
@ -41,16 +41,19 @@ def save_in_file(message, pgp_content):
|
||||||
r_serv_db.sadd('pgpdumb:uuid', '{};{}'.format(UUID, message))
|
r_serv_db.sadd('pgpdumb:uuid', '{};{}'.format(UUID, message))
|
||||||
|
|
||||||
def remove_html(item_content):
|
def remove_html(item_content):
|
||||||
if bool(BeautifulSoup(item_content, "html.parser").find()):
|
try:
|
||||||
soup = BeautifulSoup(item_content, 'html.parser')
|
if bool(BeautifulSoup(item_content, "html.parser").find()):
|
||||||
# kill all script and style elements
|
soup = BeautifulSoup(item_content, 'html.parser')
|
||||||
for script in soup(["script", "style"]):
|
# kill all script and style elements
|
||||||
script.extract() # remove
|
for script in soup(["script", "style"]):
|
||||||
|
script.extract() # remove
|
||||||
|
|
||||||
# get text
|
# get text
|
||||||
text = soup.get_text()
|
text = soup.get_text()
|
||||||
return text
|
return text
|
||||||
else:
|
else:
|
||||||
|
return item_content
|
||||||
|
except TypeError:
|
||||||
return item_content
|
return item_content
|
||||||
|
|
||||||
def extract_all_id(message, item_content, regex=None, is_file=False):
|
def extract_all_id(message, item_content, regex=None, is_file=False):
|
||||||
|
|
Loading…
Reference in New Issue