diff --git a/tests/test.py b/tests/test.py index a94bbdf..7b7ce8e 100644 --- a/tests/test.py +++ b/tests/test.py @@ -5,13 +5,14 @@ import unittest import requests import base64 import json +import os import io import zipfile from hashlib import sha256 from email.mime.application import MIMEApplication from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart - +from email.header import Header class TestModules(unittest.TestCase): @@ -314,6 +315,46 @@ class TestModules(unittest.TestCase): self.assertEqual(attch_data, 'X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-') + def test_email_body_encoding(self): + query = {"module":"email_import"} + query["config"] = {"unzip_attachments": None, + "guess_zip_attachment_passwords": None, + "extract_urls": None} + filenames = os.listdir("tests/test_files/encodings") + for fn in filenames: + message = get_base_email() + encoding = os.path.splitext(fn) + with open("tests/test_files/encodings/{0}".format(fn), "r", encoding=encoding[0]) as fp: + # Encoding is used as the name of the file + text = fp.read() + message.attach(MIMEText(text, 'html', encoding[0])) + query['data'] = decode_email(message) + data = json.dumps(query) + response = requests.post(self.url + "query", data=data) + + + def test_email_header_encoding(self): + query = {"module":"email_import"} + query["config"] = {"unzip_attachments": None, + "guess_zip_attachment_passwords": None, + "extract_urls": None} + filenames = os.listdir("tests/test_files/encodings") + for encoding in ['utf-8', 'utf-16', 'utf-32']: + message = get_base_email() + text = """I am a test e-mail + the password is NOT "this string". + That is all. + """ + message.attach(MIMEText(text, 'plain')) + for hdr, hdr_val in message.items(): + # Encoding is used as the name of the file + msg = message + hdr_encoded = MIMEText(hdr_val.encode(encoding), 'plain', encoding) + msg[hdr] = Header(hdr_val, encoding) + query['data'] = decode_email(msg) + data = json.dumps(query) + response = requests.post(self.url + "query", data=data) + def test_email_attachment_password_in_subject(self): query = {"module":"email_import"} query["config"] = {"unzip_attachments": "true", diff --git a/tests/test_files/encodings/utf-16.html b/tests/test_files/encodings/utf-16.html new file mode 100644 index 0000000..765eae0 Binary files /dev/null and b/tests/test_files/encodings/utf-16.html differ diff --git a/tests/test_files/encodings/utf-32.html b/tests/test_files/encodings/utf-32.html new file mode 100644 index 0000000..824cdf1 Binary files /dev/null and b/tests/test_files/encodings/utf-32.html differ diff --git a/tests/test_files/encodings/utf8.html b/tests/test_files/encodings/utf8.html new file mode 100644 index 0000000..c9d9b02 --- /dev/null +++ b/tests/test_files/encodings/utf8.html @@ -0,0 +1,169 @@ +๏ปฟ + + + + + + + + +Unicode Plane 1 Supplementary Character Examples using UTF-8 + + + + + + +

Example Unicode Usage For Business Applications

+

Demonstrating Unicode Plane 1 (Supplementary) Characters Encoded in UTF-8

+ +
+Also see: Introduction to the Compelling Unicode Demo. +
The original Compelling Unicode Demo (BMP) page. +
I18nGuy Home Page +
+ +

The table on this page is identical to the table on +Unicode Plane 1 Characters Encoded as Numeric Character References (NCR). +except this table uses +UTF-8 encoding for the plane 1 characters, and the other uses NCRs +(Numeric Character References +of the form &#dddd; (decimal) or &#xhhhh; (hexadecimal)). +

+

The NCR page also +has a discussion of how to set up browsers to view these characters and which browsers work. +If you find browsers or configurations that work let me know. +Note that as of version 6, IE does not support Supplementary characters encoded in UTF-8. +Netscape and Opera do support them. Also Ximian Desktop 2 (XD2) displays this page correctly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Example Plane 1 Unicode Data
Script
(links to Unicode code charts)
Origin
(in English)
Name
(English transliteration)
Origin
(in native language)
Name
(in native language) +
Submitters
EtruscanRasna (Etruria) Aulus Metellus
(Aules'i Metelis' )
๐Œ“๐Œ€๐Œ”๐Œ๐Œ€๐Œ€๐Œ–๐Œ‹๐Œ„๐Œ‘๐Œ‰ยท๐ŒŒ๐Œ„๐Œ•๐Œ„๐Œ‹๐Œ‰๐Œ‘Marco Cimarosti,
James Kass,
Andrew "Bass" +Shcheglov,
Michka Kaplan
Font: CODE2001
DeseretUtahBrigham Young๐๐ญ๐ป๐ซ ๐’๐‘‰๐ฎ๐‘€๐ฒ๐‘‹ ๐๐ฒ๐‘ John Jenkins
Font: CODE2001
GothicGothland
(Kingdom of the Goths)
(thizai +thiudangardjai thize Gutane)
Wulfila
(also Ulfilas)
๐Œธ๐Œน๐Œถ๐Œฐ๐Œน
๐Œธ๐Œน๐Œฟ๐Œณ๐Œฐ๐Œฝ๐Œฒ๐Œฐ๐‚๐Œณ๐Œพ๐Œฐ๐Œน
๐Œธ๐Œน๐Œถ๐Œด +
๐Œฒ๐Œฟ๐„๐Œฐ๐Œฝ๐Œด
๐…๐Œฟ๐Œป๐†๐Œน๐Œป๐Œฐ James Kass
Font: CODE2001
OsmanyaSomaliaCismaan Yuusuf Keenadiid
(inventor of Osmanya script)
๐’ˆ๐’๐’‘๐’›๐’๐’˜๐’•๐’– + +๐’‹๐’˜๐’ˆ๐’‘๐’›๐’’ ๐’•๐’“ +๐’ˆ๐’š๐’ ๐’๐’œ๐’’๐’–๐’† +๐’•๐’† +Mark Williamson +
Font: ANDAGII +
Linear B SyllabaryTulisosMinos๐€ถ๐€ช๐€ฐ(Unknown).Mark Williamson +
Font: PENUTURESU +
ShavianGreat Britain or United KingdomGeorge Bernard Shawยท๐‘œ๐‘ฎ๐‘ฑ๐‘‘ ยท๐‘š๐‘ฎ๐‘ฆ๐‘‘๐‘ฉ๐‘ฏ +or
ยท๐‘ฟ๐‘ฏ๐‘ฒ๐‘‘๐‘ง๐‘› ยท๐‘’๐‘ฆ๐‘™๐‘›๐‘ณ๐‘ฅ
๐‘ก๐‘น๐‘ก ยท๐‘š๐‘ป๐‘ฏ๐‘ธ๐‘› ยท๐‘–๐‘ทDoug Ewell based this entry on information from Simon Barne's (now defunct) web site. +
Font: CODE2001
+ +
+

Fonts

+ +
+ +
+Encoded in UTF-8! +Top of page +
This page last updated 2008-11-15 +
+ + + + + +