fix: Better decoding, update sample config

tests
Raphaël Vinot 2018-05-11 10:15:16 -04:00
parent b7d582927c
commit 9d7e992219
2 changed files with 8 additions and 7 deletions

View File

@ -49,7 +49,7 @@ class Mail2MISP():
self.subject = self.original_mail.get('Subject') self.subject = self.original_mail.get('Subject')
# Remove words from subject # Remove words from subject
for removeword in self.config.removelist: for removeword in self.config.removelist:
self.subject = re.sub(removeword, "", self.subject) self.subject = re.sub(removeword, "", self.subject).strip()
# Initialize the MISP event # Initialize the MISP event
self.misp_event = MISPEvent() self.misp_event = MISPEvent()
@ -97,7 +97,7 @@ class Mail2MISP():
'''The email comes from a spamtrap and should be attached as-is.''' '''The email comes from a spamtrap and should be attached as-is.'''
raw_body = self.original_mail.get_body(preferencelist=('html', 'plain')) raw_body = self.original_mail.get_body(preferencelist=('html', 'plain'))
if raw_body: if raw_body:
self.clean_email_body = html.unescape(raw_body.get_payload(decode=True).decode()) self.clean_email_body = html.unescape(raw_body.get_payload(decode=True).decode('utf8', 'surrogateescape'))
else: else:
self.clean_email_body = '' self.clean_email_body = ''
return self.forwarded_email(self.pseudofile) return self.forwarded_email(self.pseudofile)
@ -129,14 +129,14 @@ class Mail2MISP():
def process_email_body(self): def process_email_body(self):
mail_as_bytes = self.original_mail.get_body(preferencelist=('html', 'plain')).get_payload(decode=True) mail_as_bytes = self.original_mail.get_body(preferencelist=('html', 'plain')).get_payload(decode=True)
if mail_as_bytes: if mail_as_bytes:
self.clean_email_body = html.unescape(mail_as_bytes.decode()) self.clean_email_body = html.unescape(mail_as_bytes.decode('utf8', 'surrogateescape'))
# Check if there are config lines in the body & convert them to a python dictionary: # Check if there are config lines in the body & convert them to a python dictionary:
# <config.body_config_prefix>:<key>:<value> => {<key>: <value>} # <config.body_config_prefix>:<key>:<value> => {<key>: <value>}
self.config_from_email_body = {k: v for k, v in re.findall(f'{config.body_config_prefix}:(.*):(.*)', self.clean_email_body)} self.config_from_email_body = {k: v for k, v in re.findall(f'{config.body_config_prefix}:(.*):(.*)', self.clean_email_body)}
if self.config_from_email_body: if self.config_from_email_body:
# ... remove the config lines from the body # ... remove the config lines from the body
self.clean_email_body = re.sub(rf'^{config.body_config_prefix}.*\n?', '', self.clean_email_body = re.sub(rf'^{config.body_config_prefix}.*\n?', '',
html.unescape(self.original_mail.get_body(preferencelist=('html', 'plain')).get_payload(decode=True).decode()), flags=re.MULTILINE) html.unescape(self.original_mail.get_body(preferencelist=('html', 'plain')).get_payload(decode=True).decode('utf8', 'surrogateescape')), flags=re.MULTILINE)
# Check if autopublish key is present and valid # Check if autopublish key is present and valid
if self.config_from_email_body.get('m2mkey') == self.config.m2m_key: if self.config_from_email_body.get('m2mkey') == self.config.m2m_key:
@ -149,7 +149,7 @@ class Mail2MISP():
def process_body_iocs(self, email_object=None): def process_body_iocs(self, email_object=None):
if email_object: if email_object:
body = html.unescape(email_object.email.get_body(preferencelist=('html', 'plain')).get_payload(decode=True).decode()) body = html.unescape(email_object.email.get_body(preferencelist=('html', 'plain')).get_payload(decode=True).decode('utf8', 'surrogateescape'))
else: else:
body = self.clean_email_body body = self.clean_email_body

View File

@ -39,8 +39,9 @@ enforcewarninglist = True
sighting = True sighting = True
sighting_source = "YOUR_MAIL_TO_MISP_IDENTIFIER" sighting_source = "YOUR_MAIL_TO_MISP_IDENTIFIER"
# Remove "[tags]", "Re: ", "Fwd: " from subject # Remove "Re:", "Fwd:" and {Spam?} from subject
removelist = ("[\(\[].*?[\)\]]", "Re: ", "Fwd: ", "{Spam?} ") # add: "[\(\[].*?[\)\]]" to remove everything between [] and (): i.e. [tag]
removelist = ("Re:", "Fwd:", "\{Spam\?\} ")
# TLP tag setup # TLP tag setup
# Tuples contain different variations of spelling # Tuples contain different variations of spelling