mirror of https://github.com/MISP/misp-modules
Added unit tests for UTF emails
parent
bf5ed3d032
commit
0566049c63
|
@ -5,13 +5,14 @@ import unittest
|
||||||
import requests
|
import requests
|
||||||
import base64
|
import base64
|
||||||
import json
|
import json
|
||||||
|
import os
|
||||||
import io
|
import io
|
||||||
import zipfile
|
import zipfile
|
||||||
from hashlib import sha256
|
from hashlib import sha256
|
||||||
from email.mime.application import MIMEApplication
|
from email.mime.application import MIMEApplication
|
||||||
from email.mime.text import MIMEText
|
from email.mime.text import MIMEText
|
||||||
from email.mime.multipart import MIMEMultipart
|
from email.mime.multipart import MIMEMultipart
|
||||||
|
from email.header import Header
|
||||||
|
|
||||||
class TestModules(unittest.TestCase):
|
class TestModules(unittest.TestCase):
|
||||||
|
|
||||||
|
@ -314,6 +315,46 @@ class TestModules(unittest.TestCase):
|
||||||
self.assertEqual(attch_data,
|
self.assertEqual(attch_data,
|
||||||
'X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-')
|
'X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-')
|
||||||
|
|
||||||
|
def test_email_body_encoding(self):
|
||||||
|
query = {"module":"email_import"}
|
||||||
|
query["config"] = {"unzip_attachments": None,
|
||||||
|
"guess_zip_attachment_passwords": None,
|
||||||
|
"extract_urls": None}
|
||||||
|
filenames = os.listdir("tests/test_files/encodings")
|
||||||
|
for fn in filenames:
|
||||||
|
message = get_base_email()
|
||||||
|
encoding = os.path.splitext(fn)
|
||||||
|
with open("tests/test_files/encodings/{0}".format(fn), "r", encoding=encoding[0]) as fp:
|
||||||
|
# Encoding is used as the name of the file
|
||||||
|
text = fp.read()
|
||||||
|
message.attach(MIMEText(text, 'html', encoding[0]))
|
||||||
|
query['data'] = decode_email(message)
|
||||||
|
data = json.dumps(query)
|
||||||
|
response = requests.post(self.url + "query", data=data)
|
||||||
|
|
||||||
|
|
||||||
|
def test_email_header_encoding(self):
|
||||||
|
query = {"module":"email_import"}
|
||||||
|
query["config"] = {"unzip_attachments": None,
|
||||||
|
"guess_zip_attachment_passwords": None,
|
||||||
|
"extract_urls": None}
|
||||||
|
filenames = os.listdir("tests/test_files/encodings")
|
||||||
|
for encoding in ['utf-8', 'utf-16', 'utf-32']:
|
||||||
|
message = get_base_email()
|
||||||
|
text = """I am a test e-mail
|
||||||
|
the password is NOT "this string".
|
||||||
|
That is all.
|
||||||
|
"""
|
||||||
|
message.attach(MIMEText(text, 'plain'))
|
||||||
|
for hdr, hdr_val in message.items():
|
||||||
|
# Encoding is used as the name of the file
|
||||||
|
msg = message
|
||||||
|
hdr_encoded = MIMEText(hdr_val.encode(encoding), 'plain', encoding)
|
||||||
|
msg[hdr] = Header(hdr_val, encoding)
|
||||||
|
query['data'] = decode_email(msg)
|
||||||
|
data = json.dumps(query)
|
||||||
|
response = requests.post(self.url + "query", data=data)
|
||||||
|
|
||||||
def test_email_attachment_password_in_subject(self):
|
def test_email_attachment_password_in_subject(self):
|
||||||
query = {"module":"email_import"}
|
query = {"module":"email_import"}
|
||||||
query["config"] = {"unzip_attachments": "true",
|
query["config"] = {"unzip_attachments": "true",
|
||||||
|
|
Binary file not shown.
Binary file not shown.
|
@ -0,0 +1,169 @@
|
||||||
|
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
|
||||||
|
<html lang="en-US">
|
||||||
|
<head>
|
||||||
|
<META http-equiv="Content-Type" content= "text/html; charset=UTF-8">
|
||||||
|
<META name="copyright" content="© 2001-2008, Tex Texin">
|
||||||
|
<META http-equiv="Content-Language" content="en-US">
|
||||||
|
<META name="keywords" lang="en-US" content="Unicode, supplementary, business">
|
||||||
|
<META name="keywords" lang="en-US" content="UTF-8, Tex Texin, i18nGuy">
|
||||||
|
<META name="Author" content="Tex Texin">
|
||||||
|
<title>Unicode Plane 1 Supplementary Character Examples using UTF-8</title>
|
||||||
|
<meta http-equiv="Content-Style-Type" content="text/css" >
|
||||||
|
<link href="css/unicode-example.css" rel="stylesheet" type="text/css" >
|
||||||
|
<style type="text/css">
|
||||||
|
p {
|
||||||
|
width : 80%;
|
||||||
|
margin : 1em auto;
|
||||||
|
}
|
||||||
|
h3 {
|
||||||
|
text-align:center;
|
||||||
|
}
|
||||||
|
div.center {
|
||||||
|
font-size : 80%;
|
||||||
|
}
|
||||||
|
td.english {
|
||||||
|
font-size : 80%;
|
||||||
|
width : 12%;
|
||||||
|
padding : 2px 4px;
|
||||||
|
color : black;
|
||||||
|
background-color: #F8F8F8;
|
||||||
|
}
|
||||||
|
td.submitter {
|
||||||
|
font-size : 70%;
|
||||||
|
width : 20%;
|
||||||
|
padding : 2px 4px;
|
||||||
|
color : black;
|
||||||
|
background-color: #F8F8F8 ;
|
||||||
|
}
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
|
||||||
|
<body>
|
||||||
|
<h1 id="top">Example Unicode Usage For Business Applications</h1>
|
||||||
|
<h3>Demonstrating Unicode Plane 1 (Supplementary) Characters Encoded in UTF-8</h3>
|
||||||
|
|
||||||
|
<div class="center">
|
||||||
|
Also see: <a href="unicode/unicode-example-intro.html">Introduction to the Compelling Unicode Demo</a>.
|
||||||
|
<br>The original <a href="unicode-example.html">Compelling Unicode Demo</a> (BMP) page.
|
||||||
|
<br><a href="index.html">I18nGuy Home Page</a>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<p>The table on this page is identical to the table on
|
||||||
|
<a href="unicode-example-plane1.html">Unicode Plane 1 Characters Encoded as Numeric Character References (NCR).</a>
|
||||||
|
except this table uses
|
||||||
|
<a href="http://www.unicode.org/glossary/#UTF_8">UTF-8</a> encoding for the plane 1 characters, and the other uses NCRs
|
||||||
|
<b>(</b><a href="http://www.w3.org/TR/html401/charset.html#h-5.3.1">Numeric Character References</a>
|
||||||
|
of the form &#dddd; (decimal) or &#xhhhh; (hexadecimal)<b>)</b>.
|
||||||
|
</p>
|
||||||
|
<p style="BACKGROUND-COLOR: yellow">The NCR page also
|
||||||
|
has a discussion of how to set up browsers to view these characters and which browsers work.
|
||||||
|
If you find browsers or configurations that work let me know.
|
||||||
|
Note that as of version 6, IE does not support Supplementary characters encoded in UTF-8.
|
||||||
|
Netscape and Opera do support them. Also Ximian Desktop 2 (XD2) displays this page correctly.</p>
|
||||||
|
|
||||||
|
<TABLE class="ctr">
|
||||||
|
<CAPTION>Example Plane 1 Unicode Data</CAPTION>
|
||||||
|
<TBODY>
|
||||||
|
<TR>
|
||||||
|
<TD class="english"><B>Script</B><br><span class="small">(links to Unicode code charts)</span></TD>
|
||||||
|
<TD class="english"><B>Origin</B> <BR><span class="small">(in English)</span> </TD>
|
||||||
|
<TD class="english"><B>Name</B> <BR><span class="small">(English transliteration)</span> </TD>
|
||||||
|
<TD class="native"><B>Origin</B> <BR><span class="small">(in native language)</span> </TD>
|
||||||
|
<TD class="native"><B>Name</B> <BR><span class="small">(in native language)</span>
|
||||||
|
</TD>
|
||||||
|
<TD class="submitter"><B>Submitters</B> </TD></TR>
|
||||||
|
<TR>
|
||||||
|
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10300.pdf" target="_blank">Etruscan</a></TD>
|
||||||
|
<TD class="english">Rasna (Etruria) </TD>
|
||||||
|
<TD class="english">Aulus Metellus <BR>(Aules'i Metelis' )</TD>
|
||||||
|
<TD class="rtlplane1"><BDO dir=rtl>𐌓𐌀𐌔𐌍𐌀</BDO></TD>
|
||||||
|
<TD class="rtlplane1"><BDO dir=rtl>𐌀𐌖𐌋𐌄𐌑𐌉·𐌌𐌄𐌕𐌄𐌋𐌉𐌑</BDO></TD>
|
||||||
|
<TD class="submitter">Marco Cimarosti, <BR><A href="mailto:jameskass@worldnet.att.net">James Kass</A>, <BR>Andrew "Bass"
|
||||||
|
Shcheglov, <br>Michka Kaplan<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
|
||||||
|
<TR>
|
||||||
|
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10400.pdf" target="_blank">Deseret</a></TD>
|
||||||
|
<TD class="english">Utah</TD>
|
||||||
|
<TD class="english">Brigham Young</TD>
|
||||||
|
<TD class="plane1">𐐏𐐭𐐻𐐫 </TD>
|
||||||
|
<TD class="plane1">𐐒𐑉𐐮𐑀𐐲𐑋 𐐏𐐲𐑍 </TD>
|
||||||
|
<TD class="submitter">John Jenkins<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
|
||||||
|
<TR>
|
||||||
|
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10330.pdf" target="_blank">Gothic</a></TD>
|
||||||
|
<TD class="english">Gothland <BR>(Kingdom of the Goths)<BR>(thizai
|
||||||
|
thiudangardjai thize Gutane) </TD>
|
||||||
|
<TD class="english">Wulfila<BR>(also Ulfilas) </TD>
|
||||||
|
<TD class=plane1>𐌸𐌹𐌶𐌰𐌹<BR>𐌸𐌹𐌿𐌳𐌰𐌽𐌲𐌰𐍂𐌳𐌾𐌰𐌹 <BR>𐌸𐌹𐌶𐌴
|
||||||
|
<BR>𐌲𐌿𐍄𐌰𐌽𐌴 </TD>
|
||||||
|
<TD class="plane1">𐍅𐌿𐌻𐍆𐌹𐌻𐌰 </TD>
|
||||||
|
<TD class="submitter">James Kass<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td class="english"><a href="http://www.unicode.org/charts/PDF/U10480.pdf" target="_blank">Osmanya</a></td>
|
||||||
|
<td class="english">Somalia</td>
|
||||||
|
<td class="english">Cismaan Yuusuf Keenadiid<br><span class="small">(inventor of Osmanya script)</span></td>
|
||||||
|
<td class="osmanya">𐒈𐒝𐒑𐒛𐒐𐒘𐒕𐒖
|
||||||
|
</td>
|
||||||
|
<td class="osmanya">
|
||||||
|
𐒋𐒘𐒈𐒑𐒛𐒒 𐒕𐒓
|
||||||
|
𐒈𐒚𐒍 𐒏𐒜𐒒𐒖𐒆
|
||||||
|
𐒕𐒆
|
||||||
|
</td>
|
||||||
|
<td class="english">Mark Williamson
|
||||||
|
<br>Font: <a href="#andagii">ANDAGII</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
<tr>
|
||||||
|
<td class="english"><a href="http://www.unicode.org/charts/PDF/U10000.pdf" target="_blank">Linear B Syllabary</a></td>
|
||||||
|
<td class="english">Tulisos</td>
|
||||||
|
<td class="english">Minos</td>
|
||||||
|
<td class="linearb">𐀶𐀪𐀰</td>
|
||||||
|
<td class="linearb"><span class="small">(Unknown).</span></td>
|
||||||
|
<td class="english">Mark Williamson
|
||||||
|
<br>Font: <a href="#penuturesu">PENUTURESU</a>
|
||||||
|
</td>
|
||||||
|
</tr>
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
<TR>
|
||||||
|
<TD class="english"><a href="http://www.unicode.org/charts/PDF/U10450.pdf" target="_blank">Shavian</a></TD>
|
||||||
|
<TD class="english">Great Britain or United Kingdom</TD>
|
||||||
|
<TD class="english">George Bernard Shaw</TD>
|
||||||
|
<TD class=plane1>·𐑜𐑮𐑱𐑑 ·𐑚𐑮𐑦𐑑𐑩𐑯
|
||||||
|
or<br>·𐑿𐑯𐑲𐑑𐑧𐑛 ·𐑒𐑦𐑙𐑛𐑳𐑥</TD>
|
||||||
|
<TD class="plane1">𐑡𐑹𐑡 ·𐑚𐑻𐑯𐑸𐑛 ·𐑖𐑷</TD>
|
||||||
|
<TD class="submitter">Doug Ewell based this entry on information from Simon Barne's (now defunct) web site.
|
||||||
|
<br>Font: <a href="#code2001">CODE2001</a></TD></TR>
|
||||||
|
</TBODY>
|
||||||
|
</TABLE>
|
||||||
|
|
||||||
|
<div class="nottheothers" style="font-family:helvetica, arial, sans-serif">
|
||||||
|
<h2 id="fonts" style="margin-left:1in">Fonts</h2>
|
||||||
|
<ul>
|
||||||
|
<li id="code2001"><a href="http://www.code2000.net/code2001.htm">CODE2001</a></li>
|
||||||
|
<li id="andagii"><a href="unicode/unicode-font.html" target="_blank">ANDAGII</a></li>
|
||||||
|
<li id="penuturesu"><a href="unicode/unicode-font.html" target="_blank">PENUTURESU</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
|
||||||
|
<div class="center">
|
||||||
|
<a href="http://www.unicode.org" target="_blank" style="float:right;margin:1em 0 1em 1em;"><img border="0"
|
||||||
|
src="images/UniEncGreyBord.gif" width="88" height="31" alt="Encoded in UTF-8!"></a>
|
||||||
|
<a href="#top">Top of page</a>
|
||||||
|
<br>This page last updated 2008-11-15
|
||||||
|
</div>
|
||||||
|
<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 START -->
|
||||||
|
<script type='text/javascript' src='https://count.carrierzone.com/app/count_server/count.js'></script>
|
||||||
|
<script type='text/javascript'><!--
|
||||||
|
wm_custnum='5e53965097060c7f';
|
||||||
|
wm_page_name='unicode-plane1-utf8.html';
|
||||||
|
wm_group_name='/services/webpages/i/1/i18nguy.com/public';
|
||||||
|
wm_campaign_key='campaign_id';
|
||||||
|
wm_track_alt='';
|
||||||
|
wiredminds.count();
|
||||||
|
// -->
|
||||||
|
</script>
|
||||||
|
<!-- WiredMinds eMetrics tracking with Enterprise Edition V5.4 END -->
|
||||||
|
</BODY>
|
||||||
|
</HTML>
|
Loading…
Reference in New Issue