diff --git a/tests/test.py b/tests/test.py index a94bbdf6..7b7ce8e2 100644 --- a/tests/test.py +++ b/tests/test.py @@ -5,13 +5,14 @@ import unittest import requests import base64 import json +import os import io import zipfile from hashlib import sha256 from email.mime.application import MIMEApplication from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart - +from email.header import Header class TestModules(unittest.TestCase): @@ -314,6 +315,46 @@ class TestModules(unittest.TestCase): self.assertEqual(attch_data, 'X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-') + def test_email_body_encoding(self): + query = {"module":"email_import"} + query["config"] = {"unzip_attachments": None, + "guess_zip_attachment_passwords": None, + "extract_urls": None} + filenames = os.listdir("tests/test_files/encodings") + for fn in filenames: + message = get_base_email() + encoding = os.path.splitext(fn) + with open("tests/test_files/encodings/{0}".format(fn), "r", encoding=encoding[0]) as fp: + # Encoding is used as the name of the file + text = fp.read() + message.attach(MIMEText(text, 'html', encoding[0])) + query['data'] = decode_email(message) + data = json.dumps(query) + response = requests.post(self.url + "query", data=data) + + + def test_email_header_encoding(self): + query = {"module":"email_import"} + query["config"] = {"unzip_attachments": None, + "guess_zip_attachment_passwords": None, + "extract_urls": None} + filenames = os.listdir("tests/test_files/encodings") + for encoding in ['utf-8', 'utf-16', 'utf-32']: + message = get_base_email() + text = """I am a test e-mail + the password is NOT "this string". + That is all. + """ + message.attach(MIMEText(text, 'plain')) + for hdr, hdr_val in message.items(): + # Encoding is used as the name of the file + msg = message + hdr_encoded = MIMEText(hdr_val.encode(encoding), 'plain', encoding) + msg[hdr] = Header(hdr_val, encoding) + query['data'] = decode_email(msg) + data = json.dumps(query) + response = requests.post(self.url + "query", data=data) + def test_email_attachment_password_in_subject(self): query = {"module":"email_import"} query["config"] = {"unzip_attachments": "true", diff --git a/tests/test_files/encodings/utf-16.html b/tests/test_files/encodings/utf-16.html new file mode 100644 index 00000000..765eae03 Binary files /dev/null and b/tests/test_files/encodings/utf-16.html differ diff --git a/tests/test_files/encodings/utf-32.html b/tests/test_files/encodings/utf-32.html new file mode 100644 index 00000000..824cdf15 Binary files /dev/null and b/tests/test_files/encodings/utf-32.html differ diff --git a/tests/test_files/encodings/utf8.html b/tests/test_files/encodings/utf8.html new file mode 100644 index 00000000..c9d9b02e --- /dev/null +++ b/tests/test_files/encodings/utf8.html @@ -0,0 +1,169 @@ +๏ปฟ + +
+ + + + + + +The table on this page is identical to the table on +Unicode Plane 1 Characters Encoded as Numeric Character References (NCR). +except this table uses +UTF-8 encoding for the plane 1 characters, and the other uses NCRs +(Numeric Character References +of the form &#dddd; (decimal) or &#xhhhh; (hexadecimal)). +
+The NCR page also +has a discussion of how to set up browsers to view these characters and which browsers work. +If you find browsers or configurations that work let me know. +Note that as of version 6, IE does not support Supplementary characters encoded in UTF-8. +Netscape and Opera do support them. Also Ximian Desktop 2 (XD2) displays this page correctly.
+ +Script (links to Unicode code charts) |
+Origin (in English) |
+Name (English transliteration) |
+Origin (in native language) |
+Name (in native language) + |
+Submitters |
Etruscan | +Rasna (Etruria) | +Aulus Metellus (Aules'i Metelis' ) |
+๐๐๐๐๐ | +๐๐๐๐๐๐ยท๐๐๐๐๐๐๐ | +Marco Cimarosti, James Kass, Andrew "Bass" +Shcheglov, Michka Kaplan Font: CODE2001 |
Deseret | +Utah | +Brigham Young | +๐๐ญ๐ป๐ซ | +๐๐๐ฎ๐๐ฒ๐ ๐๐ฒ๐ | +John Jenkins Font: CODE2001 |
Gothic | +Gothland (Kingdom of the Goths) (thizai +thiudangardjai thize Gutane) |
+Wulfila (also Ulfilas) |
+๐ธ๐น๐ถ๐ฐ๐น ๐ธ๐น๐ฟ๐ณ๐ฐ๐ฝ๐ฒ๐ฐ๐๐ณ๐พ๐ฐ๐น ๐ธ๐น๐ถ๐ด + ๐ฒ๐ฟ๐๐ฐ๐ฝ๐ด |
+๐ ๐ฟ๐ป๐๐น๐ป๐ฐ | +James Kass Font: CODE2001 |
Osmanya | +Somalia | +Cismaan Yuusuf Keenadiid (inventor of Osmanya script) |
+๐๐๐๐๐๐๐๐ + | ++๐๐๐๐๐๐ ๐๐ +๐๐๐ ๐๐๐๐๐ +๐๐ + | +Mark Williamson
+ Font: ANDAGII + |
+
Linear B Syllabary | +Tulisos | +Minos | +๐ถ๐ช๐ฐ | +(Unknown). | +Mark Williamson
+ Font: PENUTURESU + |
+
Shavian | +Great Britain or United Kingdom | +George Bernard Shaw | +ยท๐๐ฎ๐ฑ๐ ยท๐๐ฎ๐ฆ๐๐ฉ๐ฏ
+or ยท๐ฟ๐ฏ๐ฒ๐๐ง๐ ยท๐๐ฆ๐๐๐ณ๐ฅ |
+๐ก๐น๐ก ยท๐๐ป๐ฏ๐ธ๐ ยท๐๐ท | +Doug Ewell based this entry on information from Simon Barne's (now defunct) web site.
+ Font: CODE2001 |