Added unit tests for UTF emails

pull/129/head
seamus tuohy 2017-01-11 17:53:54 -05:00
parent bf5ed3d032
commit 0566049c63
4 changed files with 211 additions and 1 deletions

View File

@ -5,13 +5,14 @@ import unittest
import requests
import base64
import json
import os
import io
import zipfile
from hashlib import sha256
from email.mime.application import MIMEApplication
from email.mime.text import MIMEText
from email.mime.multipart import MIMEMultipart
from email.header import Header
class TestModules(unittest.TestCase):
@ -314,6 +315,46 @@ class TestModules(unittest.TestCase):
self.assertEqual(attch_data,
'X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-')
def test_email_body_encoding(self):
query = {"module":"email_import"}
query["config"] = {"unzip_attachments": None,
"guess_zip_attachment_passwords": None,
"extract_urls": None}
filenames = os.listdir("tests/test_files/encodings")
for fn in filenames:
message = get_base_email()
encoding = os.path.splitext(fn)
with open("tests/test_files/encodings/{0}".format(fn), "r", encoding=encoding[0]) as fp:
# Encoding is used as the name of the file
text = fp.read()
message.attach(MIMEText(text, 'html', encoding[0]))
query['data'] = decode_email(message)
data = json.dumps(query)
response = requests.post(self.url + "query", data=data)
def test_email_header_encoding(self):
query = {"module":"email_import"}
query["config"] = {"unzip_attachments": None,
"guess_zip_attachment_passwords": None,
"extract_urls": None}
filenames = os.listdir("tests/test_files/encodings")
for encoding in ['utf-8', 'utf-16', 'utf-32']:
message = get_base_email()
text = """I am a test e-mail
the password is NOT "this string".
That is all.
"""
message.attach(MIMEText(text, 'plain'))
for hdr, hdr_val in message.items():
# Encoding is used as the name of the file
msg = message
hdr_encoded = MIMEText(hdr_val.encode(encoding), 'plain', encoding)
msg[hdr] = Header(hdr_val, encoding)
query['data'] = decode_email(msg)
data = json.dumps(query)
response = requests.post(self.url + "query", data=data)
def test_email_attachment_password_in_subject(self):
query = {"module":"email_import"}
query["config"] = {"unzip_attachments": "true",

Binary file not shown.

Binary file not shown.

View File

@ -0,0 +1,169 @@
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
<html lang="en-US">
<head>
<META http-equiv="Content-Type" content= "text/html; charset=UTF-8">
<META name="copyright" content="&copy; 2001-2008, Tex Texin">
<META http-equiv="Content-Language" content="en-US">
<META name="keywords" lang="en-US" content="Unicode, supplementary, business">
<META name="keywords" lang="en-US" content="UTF-8, Tex Texin, i18nGuy">
<META name="Author" content="Tex Texin">
<title>Unicode Plane 1 Supplementary Character Examples using UTF-8</title>
<meta http-equiv="Content-Style-Type" content="text/css" >
<link href="css/unicode-example.css" rel="stylesheet" type="text/css" >
<style type="text/css">
p {
width : 80%;
margin : 1em auto;
}
h3 {
text-align:center;
}
div.center {
font-size : 80%;
}
td.english {
font-size : 80%;
width : 12%;
padding : 2px 4px;
color : black;
background-color: #F8F8F8;
}
td.submitter {
font-size : 70%;
width : 20%;
padding : 2px 4px;
color : black;
background-color: #F8F8F8 ;
}
</style>
</head>
<body>
<h1 id="top">Example Unicode Usage For Business Applications</h1>
<h3>Demonstrating Unicode Plane 1 (Supplementary) Characters Encoded in UTF-8</h3>
<div class="center">
Also see: <a href="unicode/unicode-example-intro.html">Introduction to the Compelling Unicode Demo</a>.
<br>The original <a href="unicode-example.html">Compelling Unicode Demo</a> (BMP) page.
<br><a href="index.html">I18nGuy Home Page</a>
</div>
<p>The table on this page is identical to the table on
<a href="unicode-example-plane1.html">Unicode Plane 1 Characters Encoded as Numeric Character References (NCR).</a>
except this table uses
<a href="http://www.unicode.org/glossary/#UTF_8">UTF-8</a> encoding for the plane 1 characters, and the other uses NCRs
<b>(</b><a href="http://www.w3.org/TR/html401/charset.html#h-5.3.1">Numeric Character References</a>
of the form &amp;#dddd; (decimal) or &amp;#xhhhh; (hexadecimal)<b>)</b>.
</p>
<p style="BACKGROUND-COLOR: yellow">The NCR page also
has a discussion of how to set up browsers to view these characters and which browsers work.
If you find browsers or configurations that work let me know.
Note that as of version 6, IE does not support Supplementary characters encoded in UTF-8.
Netscape and Opera do support them. Also Ximian Desktop 2 (XD2) displays this page correctly.</p>
<TABLE class="ctr">
<CAPTION>Example Plane 1 Unicode Data</CAPTION>
<TBODY>
<TR>
<TD class="english"><B>Script</B><br><span class="small">(links to Unicode code charts)</span></TD>
<TD class="english"><B>Origin</B> <BR><span class="small">(in English)</span> </TD>
<TD class="english"><B>Name</B> <BR><span class="small">(English transliteration)</span> </TD>
<TD class="native"><B>Origin</B> <BR><span class="small">(in native language)</span> </TD>
<TD class="native"><B>Name</B> <BR><span class="small">(in native language)</span>
</TD>
<TD class="submitter"><B>Submitters</B> </TD></TR>
<TR>
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10300.pdf" target="_blank">Etruscan</a></TD>
<TD class="english">Rasna (Etruria) </TD>
<TD class="english">Aulus Metellus <BR>(Aules'i Metelis' )</TD>
<TD class="rtlplane1"><BDO dir=rtl>𐌓𐌀𐌔𐌍𐌀</BDO></TD>
<TD class="rtlplane1"><BDO dir=rtl>𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑</BDO></TD>
<TD class="submitter">Marco Cimarosti, <BR><A href="mailto:jameskass&#x40;worldnet.att.net">James Kass</A>, <BR>Andrew "Bass"
Shcheglov, <br>Michka Kaplan<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
<TR>
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10400.pdf" target="_blank">Deseret</a></TD>
<TD class="english">Utah</TD>
<TD class="english">Brigham Young</TD>
<TD class="plane1">𐐏𐐭𐐻𐐫 </TD>
<TD class="plane1">𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍 </TD>
<TD class="submitter">John Jenkins<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
<TR>
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10330.pdf" target="_blank">Gothic</a></TD>
<TD class="english">Gothland <BR>(Kingdom of the Goths)<BR>(thizai
thiudangardjai thize Gutane) </TD>
<TD class="english">Wulfila<BR>(also Ulfilas) </TD>
<TD class=plane1>𐌸𐌹𐌶𐌰𐌹<BR>𐌸𐌹𐌿𐌳𐌰𐌽𐌲𐌰𐍂𐌳𐌾𐌰𐌹 <BR>𐌸𐌹𐌶𐌴
<BR>𐌲𐌿𐍄𐌰𐌽𐌴 </TD>
<TD class="plane1">𐍅𐌿𐌻𐍆𐌹𐌻𐌰 </TD>
<TD class="submitter">James Kass<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
<tr>
<td class="english"><a href="http://www.unicode.org/charts/PDF/U10480.pdf" target="_blank">Osmanya</a></td>
<td class="english">Somalia</td>
<td class="english">Cismaan Yuusuf Keenadiid<br><span class="small">(inventor of Osmanya script)</span></td>
<td class="osmanya">𐒈𐒝𐒑𐒛𐒐𐒘𐒕𐒖
</td>
<td class="osmanya">
𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓
𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆
𐒕𐒆
</td>
<td class="english">Mark Williamson
<br>Font: <a href="#andagii">ANDAGII</a>
</td>
</tr>
<tr>
<td class="english"><a href="http://www.unicode.org/charts/PDF/U10000.pdf" target="_blank">Linear B Syllabary</a></td>
<td class="english">Tulisos</td>
<td class="english">Minos</td>
<td class="linearb">𐀶𐀪𐀰</td>
<td class="linearb"><span class="small">(Unknown).</span></td>
<td class="english">Mark Williamson
<br>Font: <a href="#penuturesu">PENUTURESU</a>
</td>
</tr>
<TR>
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10450.pdf" target="_blank">Shavian</a></TD>
<TD class="english">Great Britain or United Kingdom</TD>
<TD class="english">George Bernard Shaw</TD>
<TD class=plane1>·𐑜𐑮𐑱𐑑 ·𐑚𐑮𐑦𐑑𐑩𐑯
or<br>·𐑿𐑯𐑲𐑑𐑧𐑛 ·𐑒𐑦𐑙𐑛𐑳𐑥</TD>
<TD class="plane1">𐑡𐑹𐑡 ·𐑚𐑻𐑯𐑸𐑛 ·𐑖𐑷</TD>
<TD class="submitter">Doug Ewell based this entry on information from Simon Barne's (now defunct) web site.
<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
</TBODY>
</TABLE>
<div class="nottheothers" style="font-family:helvetica, arial, sans-serif">
<h2 id="fonts" style="margin-left:1in">Fonts</h2>
<ul>
<li id="code2001"><a href="http://www.code2000.net/code2001.htm">CODE2001</a></li>
<li id="andagii"><a href="unicode/unicode-font.html" target="_blank">ANDAGII</a></li>
<li id="penuturesu"><a href="unicode/unicode-font.html" target="_blank">PENUTURESU</a></li>
</ul>
</div>
<div class="center">
<a href="http://www.unicode.org" target="_blank" style="float:right;margin:1em 0 1em 1em;"><img border="0"
src="images/UniEncGreyBord.gif" width="88" height="31" alt="Encoded in UTF-8!"></a>
<a href="#top">Top of page</a>
<br>This page last updated 2008-11-15
</div>
<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 START -->
<script type='text/javascript' src='https://count.carrierzone.com/app/count_server/count.js'></script>
<script type='text/javascript'><!--
wm_custnum='5e53965097060c7f';
wm_page_name='unicode-plane1-utf8.html';
wm_group_name='/services/webpages/i/1/i18nguy.com/public';
wm_campaign_key='campaign_id';
wm_track_alt='';
wiredminds.count();
// -->
</script>
<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 END -->
</BODY>
</HTML>