fix: Somewhat broken emails needed some love

pull/360/head
Raphaël Vinot 2019-12-05 19:11:01 +01:00
parent 6fcd9c9b8d
commit b70c32af7b
1 changed files with 24 additions and 18 deletions

View File

@ -111,6 +111,7 @@ def handler(q=False):
mail_body = email_object.email.get_body(preferencelist=('html', 'plain'))
if extract_urls:
if mail_body:
charset = mail_body.get_content_charset()
if mail_body.get_content_type() == 'text/html':
url_parser = HTMLURLParser()
@ -213,16 +214,21 @@ def get_zip_passwords(message):
body = []
for part in message.walk():
charset = part.get_content_charset()
if not charset:
charset = "utf-8"
if part.get_content_type() == 'text/plain':
body.append(part.get_payload(decode=True).decode(charset, errors='ignore'))
elif part.get_content_type() == 'text/html':
html_parser = HTMLTextParser()
html_parser.feed(part.get_payload(decode=True).decode(charset, errors='ignore'))
payload = part.get_payload(decode=True)
if payload:
html_parser.feed(payload.decode(charset, errors='ignore'))
for text in html_parser.text_data:
body.append(text)
raw_text = "\n".join(body).strip()
# Add subject to text corpus to parse
if "Subject" in message:
subject = " " + message.get('Subject')
raw_text += subject