mirror of https://github.com/MISP/misp-modules
fix: Somewhat broken emails needed some love
parent
6fcd9c9b8d
commit
b70c32af7b
|
@ -111,19 +111,20 @@ def handler(q=False):
|
||||||
|
|
||||||
mail_body = email_object.email.get_body(preferencelist=('html', 'plain'))
|
mail_body = email_object.email.get_body(preferencelist=('html', 'plain'))
|
||||||
if extract_urls:
|
if extract_urls:
|
||||||
charset = mail_body.get_content_charset()
|
if mail_body:
|
||||||
if mail_body.get_content_type() == 'text/html':
|
charset = mail_body.get_content_charset()
|
||||||
url_parser = HTMLURLParser()
|
if mail_body.get_content_type() == 'text/html':
|
||||||
url_parser.feed(mail_body.get_payload(decode=True).decode(charset, errors='ignore'))
|
url_parser = HTMLURLParser()
|
||||||
urls = url_parser.urls
|
url_parser.feed(mail_body.get_payload(decode=True).decode(charset, errors='ignore'))
|
||||||
else:
|
urls = url_parser.urls
|
||||||
urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', mail_body.get_payload(decode=True).decode(charset, errors='ignore'))
|
else:
|
||||||
for url in urls:
|
urls = re.findall(r'https?://(?:[-\w.]|(?:%[\da-fA-F]{2}))+', mail_body.get_payload(decode=True).decode(charset, errors='ignore'))
|
||||||
if not url:
|
for url in urls:
|
||||||
continue
|
if not url:
|
||||||
url_object = URLObject(url, standalone=False)
|
continue
|
||||||
file_objects.append(url_object)
|
url_object = URLObject(url, standalone=False)
|
||||||
email_object.add_reference(url_object.uuid, 'includes', 'URL in email body')
|
file_objects.append(url_object)
|
||||||
|
email_object.add_reference(url_object.uuid, 'includes', 'URL in email body')
|
||||||
|
|
||||||
objects = [email_object.to_json()]
|
objects = [email_object.to_json()]
|
||||||
if file_objects:
|
if file_objects:
|
||||||
|
@ -213,18 +214,23 @@ def get_zip_passwords(message):
|
||||||
body = []
|
body = []
|
||||||
for part in message.walk():
|
for part in message.walk():
|
||||||
charset = part.get_content_charset()
|
charset = part.get_content_charset()
|
||||||
|
if not charset:
|
||||||
|
charset = "utf-8"
|
||||||
if part.get_content_type() == 'text/plain':
|
if part.get_content_type() == 'text/plain':
|
||||||
body.append(part.get_payload(decode=True).decode(charset, errors='ignore'))
|
body.append(part.get_payload(decode=True).decode(charset, errors='ignore'))
|
||||||
elif part.get_content_type() == 'text/html':
|
elif part.get_content_type() == 'text/html':
|
||||||
html_parser = HTMLTextParser()
|
html_parser = HTMLTextParser()
|
||||||
html_parser.feed(part.get_payload(decode=True).decode(charset, errors='ignore'))
|
payload = part.get_payload(decode=True)
|
||||||
for text in html_parser.text_data:
|
if payload:
|
||||||
body.append(text)
|
html_parser.feed(payload.decode(charset, errors='ignore'))
|
||||||
|
for text in html_parser.text_data:
|
||||||
|
body.append(text)
|
||||||
raw_text = "\n".join(body).strip()
|
raw_text = "\n".join(body).strip()
|
||||||
|
|
||||||
# Add subject to text corpus to parse
|
# Add subject to text corpus to parse
|
||||||
subject = " " + message.get('Subject')
|
if "Subject" in message:
|
||||||
raw_text += subject
|
subject = " " + message.get('Subject')
|
||||||
|
raw_text += subject
|
||||||
|
|
||||||
# Grab any strings that are marked off by special chars
|
# Grab any strings that are marked off by special chars
|
||||||
marking_chars = [["\'", "\'"], ['"', '"'], ['[', ']'], ['(', ')']]
|
marking_chars = [["\'", "\'"], ['"', '"'], ['[', ']'], ['(', ')']]
|
||||||
|
|
Loading…
Reference in New Issue