From 86ae72c444bee64228654950f44755517967d86c Mon Sep 17 00:00:00 2001 From: seamus tuohy Date: Mon, 26 Dec 2016 13:55:54 -0800 Subject: [PATCH] Added attachment and url support --- README.md | 41 +++ .../modules/import_mod/email_import.py | 303 ++++++++++++++++-- 2 files changed, 323 insertions(+), 21 deletions(-) diff --git a/README.md b/README.md index 1575aae..b40b828 100644 --- a/README.md +++ b/README.md @@ -87,6 +87,9 @@ def introspection(): The function that returns a dict with the version and the associated meta-data including potential configurations required of the module. + +### Additional Configuration Values + If your module requires additional configuration (to be exposed via the MISP user-interface), you can define those in the moduleconfig value returned by the version function. ~~~python @@ -98,6 +101,7 @@ def version(): return moduleinfo ~~~ + When you do this a config array is added to the meta-data output containing all the potential configuration values: ~~~ @@ -115,6 +119,20 @@ When you do this a config array is added to the meta-data output containing all ... ~~~ + +If you want to use the configuration values set in the web interface they are stored in the key `config` in the JSON object passed to the handler. + +~~~ +def handler(q=False): + + # Check if we were given a configuration + config = q.get("config", {}) + + # Find out if there is a username field + username = config.get("username", None) +~~~ + + ### handler The function which accepts a JSON document to expand the values and return a dictionary of the expanded values. @@ -134,6 +152,29 @@ def handler(q=False): codecs.encode(src, "rot-13")} ~~~ +### Returning Binary Data + +If you want to return a file or other data you need to add a data attribute. + +~~~python +{"results": {"values": "filename.txt", + "types": "attachment", + "data" : base64.b64encode() # base64 encode your data first + "comment": "This is an attachment"}} +~~~ + +If the binary file is malware you can use 'malware-sample' as the type. If you do this the malware sample will be automatically zipped and password protected ('infected') after being uploaded. + + +~~~python +{"results": {"values": "filename.txt", + "types": "malware-sample", + "data" : base64.b64encode() # base64 encode your data first + "comment": "This is an attachment"}} +~~~ + + + ### Module type diff --git a/misp_modules/modules/import_mod/email_import.py b/misp_modules/modules/import_mod/email_import.py index 0d66330..8165e0b 100644 --- a/misp_modules/modules/import_mod/email_import.py +++ b/misp_modules/modules/import_mod/email_import.py @@ -3,9 +3,13 @@ import json import base64 +import io +import zipfile +import re from email import message_from_bytes from email.utils import parseaddr -import re +from email.iterators import typed_subpart_iterator +from html.parser import HTMLParser misperrors = {'error': 'Error'} userConfig = { } @@ -17,7 +21,14 @@ moduleinfo = {'version': '0.1', 'description': 'Email import module for MISP', 'module-type': ['import']} -moduleconfig = [] +# treat_attachments_as_malware : This treats all attachments as malware. This will zip all attachments and password protect using the password 'infected' +# unzip_attachments : Unzip all zip files that are not password protected +# guess_zip_attachment_passwords : This attempts to unzip all password protected zip files using all the strings found in the email body and subject +# extract_urls : This attempts to extract all URL's from text/html parts of the email +moduleconfig = ["treat_attachments_as_malware", + "unzip_attachments", + "guess_zip_attachment_passwords", + "extract_urls"] def handler(q=False): @@ -31,7 +42,32 @@ def handler(q=False): data = base64.b64decode(request["data"]) message = message_from_bytes(data) - # Extract header information + # Extract all header information + all_headers = "" + for k, v in message.items(): + all_headers += "\n{0}: {1}".format(k, v) + results.append({"values": all_headers, + "types": ['email-header']}) + + # E-Mail MIME Boundry + results.append({"values": message.get_boundary(), + "types": ['email-mime-boundary']}) + + # E-Mail Reply To + results.append({"values": message.get('In-Reply-To'), + "types": ['email-reply-to']}) + + # X-Mailer + results.append({"values": message.get('X-Mailer'), + "types": ['email-x-mailer']}) + + # Thread Index + results.append({"values": message.get('Thread-Index'), + "types": ['email-thread-index']}) + + ## Email Message ID + results.append({"values": message.get('Message-ID'), + "types": ['email-message-id']}) # Subject results.append({"values": message.get('Subject'), @@ -41,12 +77,25 @@ def handler(q=False): from_addr = message.get('From') results.append({"values": parseaddr(from_addr)[1], "types": ['email-src'], - "comment": "From: {0}".format(from_addr)}) + "comment": "From: {0}".format(re.sub('["\']', + '', + from_addr))}) + results.append({"values": parseaddr(from_addr)[1], + "types": ['email-src-display-name'], + "comment": "From: {0}".format(re.sub('["\']', + '', + from_addr))}) + # Return Path return_path = message.get('Return-Path') + # E-Mail Source results.append({"values": parseaddr(return_path)[1], "types": ['email-src'], "comment": "Return Path: {0}".format(return_path)}) + # E-Mail Source Name + results.append({"values": parseaddr(return_path)[0], + "types": ['email-src-display-name'], + "comment": "Return Path: {0}".format(return_path)}) # Destinations ## Split and sort destination header values @@ -62,17 +111,20 @@ def handler(q=False): results.append({"values": parsed_addr[1], "types": ["email-dst"], "comment": "{0}: {1}".format(hdr_val, - addr)}) + re.sub('["\']', + '', + addr))}) + results.append({"values": parsed_addr[0], + "types": ["email-dst-display-name"], + "comment": "{0}: {1}".format(hdr_val, + re.sub('["\']', + '', + addr))}) + except AttributeError: continue - # # TODO add 'email-dst-realname' value - # results.append({"values":parsed_addr[1], - # "types":["email-dst-realname"], - # "comment":"{0}: {1}".format(dst_type, - # addr)}) - - # Targets + # Get E-Mail Targets # Get the addresses that received the email. # As pulled from the Received header received = message.get_all('received') @@ -89,32 +141,241 @@ def handler(q=False): "types": ["target-email"], "comment": "Extracted from email 'Received' header"}) - ## TODO add 'email-received-path' value - # received_path = '\n'.join(received) - # results.append({"values":received_path, - # "types":["email-received-path"]}) + # Check if we were given a configuration + config = request.get("config", {}) + # Don't be picky about how the user chooses to say yes to these + acceptable_config_yes = ['y', 'yes', 'true', 't'] - # Attachments + # Do we treat all attachments as malware + treat_attachments_as_malware = config.get("treat_attachments_as_malware", + False) + if treat_attachments_as_malware.lower() in acceptable_config_yes: + treat_attachments_as_malware = True + + # Do we unzip attachments we find? + unzip = config.get("unzip_attachments", False) + if unzip.lower() in acceptable_config_yes: + unzip = True + + # Do we try to find passwords for protected zip files? + zip_pass_crack = config.get("guess_zip_attachment_passwords", False) + if zip_pass_crack.lower() in acceptable_config_yes: + zip_pass_crack = True + password_list = None # Only want to collect password list once + + # Do we extract URL's from the email. + extract_urls = config.get("extract_urls", False) + if extract_urls.lower() in acceptable_config_yes: + extract_urls = True + + # Get Attachments # Get file names of attachments for part in message.walk(): filename = part.get_filename() if filename is not None: - results.append({"values": filename, - "types": ["email-attachment"]}) + attachment_data = part.get_payload(decode=True) + if unzip is True: # Attempt to unzip the attachment and return its files + try: + attachment_files = get_zipped_contents(filename, + attachment_data) + except RuntimeError: # File is encrypted with a password + if zip_pass_crack is True: + if password_list is None: + password_list = get_zip_passwords(message) + password = test_zip_passwords(attachment_data, password_list) + # If we don't guess the password just use the zip + if password is None: + attachment_files = [{"values": filename, + "data" : base64.b64encode(attachment_data), + "comment":"Password could not be cracked from message"}] + else: + attachment_files = get_zipped_contents(filename, + attachment_data, + password=password) + except zipfile.BadZipFile: # Attachment is not a zipfile + attachment_files = [{"values": filename, + "data" : base64.b64encode(attachment_data)}] + else: + attachment_files = [{"values": filename, + "data" : base64.b64encode(attachment_data)}] + for attch_item in attachment_files: + if treat_attachments_as_malware is True: # Malware-samples are encrypted by server + attch_item["types"] = ['malware-sample'] + else: + attch_item["types"] = ['attachment'] + results.append(attch_item) + else: # Check email body part for urls + if (extract_urls is True and part.get_content_type() == 'text/html'): + url_parser = HTMLURLParser() + charset = get_charset(i, get_charset(message)) + url_parser.feed(part.get_payload(decode=True).decode(charset)) + urls = url_parser.urls + for url in urls: + results.append({"values": url, + "types": "url"}) r = {'results': results} return r +def get_zipped_contents(filename, data, password=None): + """Extract the contents of a zipfile. + + Args: + filename (str): A string containing the name of the zip file. + data (decoded attachment data): Data object decoded from an e-mail part. + + Returns: + Returns an array containing a dict for each file + Example Dict {"values":"name_of_file.txt", + "data":, + "comment":"string here"} + + """ + with zipfile.ZipFile(io.BytesIO(data), "r") as zf: + unzipped_files = [] + if password is not None: + password = str.encode(password) # Byte encoded password required + for zip_file_name in zf: # Get all files in the zip file + unzipped_files.append({"values": zip_file_name, + "data" : base64.b64encode(zf.open(zip_file_name, + mode='rU', + pwd=password)), # Any password works when not encrypted + "comment": "Extracted from {0}".format(filename)}) + return unzipped_files + + +def test_zip_passwords(data, test_passwords): + """Test passwords until one is found to be correct. + + Args: + data (decoded attachment data): Data object decoded from an e-mail part. + test_passwords (array): List of strings to test as passwords + + Returns: + Returns a byte string containing a found password and None if password is not found. + + """ + with zipfile.ZipFile(io.BytesIO(data), "r") as zf: + for pw_test in test_passwords: + byte_pwd = str.encode(pw_test) + try: + zf.testzip() + return byte_pwd + except RuntimeError: # Incorrect Password + continue + return None + +def get_zip_passwords(message): + """ Parse message for possible zip password combinations. + + Args: + message (email.message) Email message object to parse. + """ + possible_passwords = [] + # Passwords commonly used for malware + malware_passwords = ["infected", "malware"] + possible_passwords += malware_passwords + # Commonly used passwords + common_passwords = ["123456", "password", "12345678", "qwerty", + "abc123", "123456789", "111111", "1234567", + "iloveyou", "adobe123", "123123", "sunshine", + "1234567890", "letmein", "1234", "monkey", + "shadow", "sunshine", "12345", "password1", + "princess", "azerty", "trustno1", "000000"] + + possible_passwords += common_passwords + + # Not checking for multi-part message because by having an + # encrypted zip file it must be multi-part. + text_parts = [part for part in typed_subpart_iterator(message, + 'text', + 'plain')] + html_parts = [part for part in typed_subpart_iterator(message, + 'text', + 'html')] + body = [] + # Get full message character set once + # Language example reference (using python2) + # http://ginstrom.com/scribbles/2007/11/19/parsing-multilingual-email-with-python/ + message_charset = get_charset(message) + for part in text_parts: + charset = get_charset(part, message_charset) + body.append(part.get_payload(decode=True).decode(charset)) + for part in html_parts: + charset = get_charset(part, message_charset) + html_part = part.get_payload(decode=True).decode(charset) + html_parser = HTMLTextParser() + html_parser.feed(html_part) + for text in html_parser.text_data: + body.append(text) + raw_text = "\n".join(body).strip() + + # Add subject to text corpus to parse + subject = " " + message.get('Subject') + raw_text += subject + + # Grab any strings that are marked off by special chars + marking_chars = [["'", "'"], ['"', '"'], ['[', ']'], ['(', ')']] + for char_set in marking_chars: + regex = re.compile("'{0}([^{1}]*){1}'".format(char_set[0], + char_set[1])) + marked_off = re.findall(regex, raw_text) + possible_passwords += marked_off + + # Create a list of unique words to test as passwords + individual_words = re.split(r"\s", raw_text) + # Also get words with basic punctuation stripped out + # just in case someone places a password in a proper sentence + stripped_words = [i.strip('.,;:?!') for i in individual_words] + unique_words = list(set(individual_words + stripped_words)) + possible_passwords += unique_words + + return possible_passwords + +class HTMLTextParser(HTMLParser): + """ Parse all text and data from HTML strings.""" + def __init__(self, text_data=None): + HTMLParser.__init__(self) + if text_data is None: + self.text_data = [] + else: + self.text_data = text_data + def handle_data(self, data): + self.text_data.append(data) + +class HTMLURLParser(HTMLParser): + """ Parse all href targets from HTML strings.""" + def __init__(self, urls=None): + HTMLParser.__init__(self) + if urls is None: + self.urls = [] + else: + self.urls = output_list + def handle_starttag(self, tag, attrs): + if tag == 'a': + self.urls.append(dict(attrs).get('href')) + +def get_charset(message, default="ascii"): + """Get a message objects charset + + Args: + message (email.message): Email message object to parse. + default (string): String containing default charset to return. + """ + if message.get_content_charset(): + return message.get_content_charset() + if message.get_charset(): + return message.get_charset() + return default + def introspection(): modulesetup = {} try: - userConfig modulesetup['userConfig'] = userConfig except NameError: pass try: - inputSource modulesetup['inputSource'] = inputSource except NameError: pass