diff --git a/misp_modules/modules/import_mod/email_import.py b/misp_modules/modules/import_mod/email_import.py index 2027303..e7564f1 100644 --- a/misp_modules/modules/import_mod/email_import.py +++ b/misp_modules/modules/import_mod/email_import.py @@ -144,20 +144,33 @@ def handler(q=False): # Base attachment data is default attachment_files = [{"values": filename, "data": base64.b64encode(attachment_data).decode()}] if unzip is True: # Attempt to unzip the attachment and return its files - try: - attachment_files += get_zipped_contents(filename, attachment_data) - except RuntimeError: # File is encrypted with a password - if zip_pass_crack is True: - if password_list is None: - password_list = get_zip_passwords(message) - password = test_zip_passwords(attachment_data, password_list) - if password is None: # Inform the analyst that we could not crack password - attachment_files[0]['comment'] = "Encrypted Zip: Password could not be cracked from message" - else: - attachment_files[0]['comment'] = """Original Zipped Attachment with Password {0}""".format(password) - attachment_files += get_zipped_contents(filename, attachment_data, password=password) - except zipfile.BadZipFile: # Attachment is not a zipfile - attachment_files += [{"values": filename, "data": base64.b64encode(attachment_data).decode()}] + zipped_files = ["doc", "docx", "dot", "dotx", "xls", + "xlsx", "xlm", "xla", "xlc", "xlt", + "xltx", "xlw", "ppt", "pptx", "pps", + "ppsx", "pot", "potx", "potx", "sldx", + "odt", "ods", "odp", "odg", "odf", + "fodt", "fods", "fodp", "fodg", "ott", + "uot"] + + zipped_filetype = False + for ext in zipped_files: + if filename.endswith(ext) is True: + zipped_filetype = True + if zipped_filetype == False: + try: + attachment_files += get_zipped_contents(filename, attachment_data) + except RuntimeError: # File is encrypted with a password + if zip_pass_crack is True: + if password_list is None: + password_list = get_zip_passwords(message) + password = test_zip_passwords(attachment_data, password_list) + if password is None: # Inform the analyst that we could not crack password + attachment_files[0]['comment'] = "Encrypted Zip: Password could not be cracked from message" + else: + attachment_files[0]['comment'] = """Original Zipped Attachment with Password {0}""".format(password) + attachment_files += get_zipped_contents(filename, attachment_data, password=password) + except zipfile.BadZipFile: # Attachment is not a zipfile + pass for attch_item in attachment_files: attch_item["type"] = 'malware-sample' results.append(attch_item) diff --git a/tests/test.py b/tests/test.py index d15144d..a94bbdf 100644 --- a/tests/test.py +++ b/tests/test.py @@ -7,6 +7,7 @@ import base64 import json import io import zipfile +from hashlib import sha256 from email.mime.application import MIMEApplication from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart @@ -101,11 +102,7 @@ class TestModules(unittest.TestCase): self.assertEqual(types['email-x-mailer'], 1) self.assertIn("mlx 5.1.7", values) self.assertEqual(types['email-reply-to'], 1) - # The parser inserts a newline that I can't diagnose. - # It does not impact analysis since the interface strips it. - # But, I'm leaving this test failing self.assertIn("", values) - #self.assertIn("\n ", values) def test_email_attachment_basic(self): query = {"module":"email_import"} @@ -162,6 +159,39 @@ class TestModules(unittest.TestCase): self.assertEqual(attch_data, b'X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-') + def test_email_dont_unpack_compressed_doc_attachments(self): + """Ensures that compressed + """ + query = {"module":"email_import"} + query["config"] = {"unzip_attachments": "true", + "guess_zip_attachment_passwords": None, + "extract_urls": None} + message = get_base_email() + text = """I am a test e-mail""" + message.attach(MIMEText(text, 'plain')) + with open("tests/test_files/test.docx", "rb") as fp: + eicar_mime = MIMEApplication(fp.read(), 'zip') + eicar_mime.add_header('Content-Disposition', 'attachment', filename="test.docx") + message.attach(eicar_mime) + query['data'] = decode_email(message) + data = json.dumps(query) + response = requests.post(self.url + "query", data=data) + values = [x["values"] for x in response.json()["results"]] + self.assertIn('test.docx', values) + types = {} + for i in response.json()['results']: + types.setdefault(i["type"], 0) + types[i["type"]] += 1 + # Check that there is only one attachment in the bundle + self.assertEqual(types['malware-sample'], 1) + for i in response.json()['results']: + if i['type'] == 'malware-sample' and i["values"] == 'test.docx': + attch_data = base64.b64decode(i["data"]) + filesum = sha256() + filesum.update(attch_data) + self.assertEqual(filesum.hexdigest(), + '098da5381a90d4a51e6b844c18a0fecf2e364813c2f8b317cfdc51c21f2506a5') + def test_email_attachment_unpack_with_password(self): query = {"module":"email_import"} diff --git a/tests/test_files/test.docx b/tests/test_files/test.docx new file mode 100644 index 0000000..df59d12 Binary files /dev/null and b/tests/test_files/test.docx differ