From 0566049c630c0595b44984ace3fde4457ac6af60 Mon Sep 17 00:00:00 2001 From: seamus tuohy Date: Wed, 11 Jan 2017 17:53:54 -0500 Subject: [PATCH] Added unit tests for UTF emails --- tests/test.py | 43 ++++++- tests/test_files/encodings/utf-16.html | Bin 0 -> 12506 bytes tests/test_files/encodings/utf-32.html | Bin 0 -> 15596 bytes tests/test_files/encodings/utf8.html | 169 +++++++++++++++++++++++++ 4 files changed, 211 insertions(+), 1 deletion(-) create mode 100644 tests/test_files/encodings/utf-16.html create mode 100644 tests/test_files/encodings/utf-32.html create mode 100644 tests/test_files/encodings/utf8.html diff --git a/tests/test.py b/tests/test.py index a94bbdf6..7b7ce8e2 100644 --- a/tests/test.py +++ b/tests/test.py @@ -5,13 +5,14 @@ import unittest import requests import base64 import json +import os import io import zipfile from hashlib import sha256 from email.mime.application import MIMEApplication from email.mime.text import MIMEText from email.mime.multipart import MIMEMultipart - +from email.header import Header class TestModules(unittest.TestCase): @@ -314,6 +315,46 @@ class TestModules(unittest.TestCase): self.assertEqual(attch_data, 'X5O!P%@AP[4\\PZX54(P^)7CC)7}$EICAR-STANDARD-ANTIVIRUS-TEST-') + def test_email_body_encoding(self): + query = {"module":"email_import"} + query["config"] = {"unzip_attachments": None, + "guess_zip_attachment_passwords": None, + "extract_urls": None} + filenames = os.listdir("tests/test_files/encodings") + for fn in filenames: + message = get_base_email() + encoding = os.path.splitext(fn) + with open("tests/test_files/encodings/{0}".format(fn), "r", encoding=encoding[0]) as fp: + # Encoding is used as the name of the file + text = fp.read() + message.attach(MIMEText(text, 'html', encoding[0])) + query['data'] = decode_email(message) + data = json.dumps(query) + response = requests.post(self.url + "query", data=data) + + + def test_email_header_encoding(self): + query = {"module":"email_import"} + query["config"] = {"unzip_attachments": None, + "guess_zip_attachment_passwords": None, + "extract_urls": None} + filenames = os.listdir("tests/test_files/encodings") + for encoding in ['utf-8', 'utf-16', 'utf-32']: + message = get_base_email() + text = """I am a test e-mail + the password is NOT "this string". + That is all. + """ + message.attach(MIMEText(text, 'plain')) + for hdr, hdr_val in message.items(): + # Encoding is used as the name of the file + msg = message + hdr_encoded = MIMEText(hdr_val.encode(encoding), 'plain', encoding) + msg[hdr] = Header(hdr_val, encoding) + query['data'] = decode_email(msg) + data = json.dumps(query) + response = requests.post(self.url + "query", data=data) + def test_email_attachment_password_in_subject(self): query = {"module":"email_import"} query["config"] = {"unzip_attachments": "true", diff --git a/tests/test_files/encodings/utf-16.html b/tests/test_files/encodings/utf-16.html new file mode 100644 index 0000000000000000000000000000000000000000..765eae03e23700206938164884536a9da213fd4c GIT binary patch literal 12506 zcmeI3`)^!F701UCzZ9rYdHuY&qIGD!YbU01631Z2PH+<^X}oEaf`r!g`hnMv?yj4- z5%3#ALI^0LLOlMC{ww=Kr2Hqq_cQ0t&fL4#@p>D?LzOkzyK`sGIdk6U%;ul}m~-E8 zOYW|_?&|K5yXTf&#r;UntM0a*?zsnUQBNy+UvcAZ($)0$&|T7Zo|d$7DYs+BRo#?k z>aOh??!a~2uIp+X>bc=se%G?QBUtn9Rrh5#mtbr89oz1j-Zr&ru)zx3*Sc`%5{&o>!Ee^w(d@}swtQiw~>2yO*7+y(R5GrZ_Pv8 z7rj^g${kVG)>)c*cTH5)-3>vTcJFxVDq3S_i4<|#eXZv#U^)`!w&>dSkjwCoyEpnP z!Fo}AoDz=7>}kfmt=YP#FnsQcOK`G`f?;`bJLCGXKDPzEoVL6?%J8Kp-V>Y?_gLp{ zyG`*8n5Rh>SVdWki^m7z7W12e#mPI8>yiFi`VMCgBnzZ+(tq35w?mzw;pNDlb?rd| z5466cKjx4yvN1@^r1DI^&-(S@?Ip>9w}TR3c2|;`6`q^Icyg*tQXVa6Wmnn&kIH#I zh$$~??5gYeF<;;pmTUZOineKu<=m+`v+-qH@4^Yu=Q-(q3fJ_yqq!50sp6loAv8PM z%ZBG~uxfU#d)b``{!?!q$azb1$Zb+j$Y576Puvx~L6W=rx8-^Kk>H~_c#wvl+0jf( zR*eOs7dG4RD>{OQZ31CS77KQ;?dxq@`hff^9?M0o+Sl`p`);=G=YBOicEl$nxg$9T zn#1`f1*s)C+kHqIg2YL{Y8VX?I=Qa3@ZHW0A255+eRllnUBRrl*S(jxT%ch`Zw!0n_+w!U{>QKaza=P!t^BNkzT#<(Voqi&@^q~0 zd>d|0{K6uS#i`By+0#8Y} zKeP#};m@REI+Z^`S1YT6#9B0`BfT0tN5mK5%_ca9=i{unXd~sLQGpF%HCRueVR*BV(`Z;xc1B*rx}x8U zc*=BZ+T#em@5P)PRg$S^Fj^rQe_;p1N6hGtL*7BWA=2P?$a~g@k^&Xq9iIU{aQF4Rrf2-h z7qS>#7S+ra$ml}6jg}DoGX5})_A!o-g|Dg7c%<5UxnJ+rR4eBz;E-d7iWtjFYmBb4 zuOmf@d7sxe`jE(VN2?jEMyx2?usm+DP+?VM(N?8dmQLP54%hJ%7+#|MoX@kN0e)CU zmDV8@ot)~%>YGWOn;epONIqDsZq`Iad2NU$TlG%VA>vWT!(a0Jz&Cmv^gwR8_e9ZM zaXxbWNUQ2vP41Mh9Xpz_{FFM59IqlO0*A0c_VA8RY8C2MY8_Tu?PoQBK{37{!yUhO z%hy;{*>%n5(6Jt-&yiNKlfoNwj;i)|g{$QyPxgKOop1+D1h!wMH>?1Ku|Hr_(%mI5+$@X_-9B_-($)BoF`c z8nvP*6MJHKwZqH<4r)}Mzct@=KFuSiy_aCmsVCZycHm<>S@#uf9~U>+hyAWel0*aO zwr=yq-1~ca8!LT$!>oUw_G6fx)(H6X9gBJSp1eGY5vwSfCE?@@@SzCHyjNG&gr_{| zB;lm2WVZ>3Tg}dIGEUVJ_z_*Y!==bUD!DJ1wPKAbDj#T3*b8xO!%Gj zBh${v1y4Ec+}LLi{rs0f z>F47tx?*{0>_S<#g6}tfbUKxxdl0N(I6Xg>wa{NCgN!+!>6B?cT5C}y+}_mX)6qS; zIA`0^veyu@9_-cP1v~9F2C4(a!$7+)7#%?&@0pNwpnb#^GG612!9EX6ZWvk`z}H39`*#J2jIQl{v#rAxT<;st zUC(KJQ{z?tjGSyw@Cd_jcNE;<{kFzeG`^(qn#L>7KND47&1SxpeX`;ejn|%!MxEVk z#-1=|x4e}ckZNVuXE@|%cl!`3s)!5*r(MuHdIKpn3);6SX*QMp(zRU7))Q~n6kRqnn!X2qsQz`(;o#fqx!{+b{9T=NN19TRZjqJjN$&In%B389C!cePO>xJJ9%8WFz2J78dc`_yUFk|qy}2cIU24)8Zck@?hqJ2M zU{m@-R6ujthc~8*`l&KCv#UK{A>0)dBAVSKa6 z5!on$CcO5s{`fTt74|Ki1CM5XD(>XxeAfetLp@W>CCe%z?q=!BSjBB#B+m)@Rk3<2 z&zI4))6bT_qao6d^?oh?{&$T(X#7><_ZolJ_>;z`&kRp_+OghmGH7547(Vf_ z1^9pT@QsGQ==*Q|o8Rf_ZyKLye5&!QY!y4~c{E+HEQ?OkL+|IP>WIVmw|p&2X9QW$ zpC7c1ycG;~W<#ex?S-7J_4(Vdp!H?^M;gJxgG9pjVeMk@R4>os`7Otkyv6Vqe_N6W zw^kMTbTUVNZG=~#E6Z=Z0!K!ZeO=aST~usfG#KQn2hxn;_Jb{)Y2EuZMZIUP_dSiN zB%cZr9Q4zJ5Qg_CbeHx#wU6~L9dOPZ{L!f1^uODJ%5E})*8DHp(Tw~4IqisgtD!2( zZpp&gN7MG3vN~>GxWTZxg}9AQljG9cw+?2wmB{D8qvC9O*yILarDh|tk_THg*-##G zHtc-X`I+L-PZT>2G(OaLPf>=s53;!}KR4Ff)#ss)HJmR*aaR7)zZvVbvz6QW{BZ{5 zzMg^b$j^=SI{Iu5W#1{47K~SNNs>6$ zUOKh>au9FKeGc6KyN9xRg&NK>7rb=1okCBDgKeM5g_|9$a7%MmDXmGH_VtWL(KSHU zO?8@pIH70m)94lPdop4!H&RqPP_wR2?v2sDqHi->x32VyBY8gNh1-c@N2KO{;j6HG zPo*~S3yG@xK)++T=Y4(g*As)bXj&ZNcJNPiaf081J?iIi$)UzN3+XW zw~`)Neh+t>r}yBC_nvl3#&2@tZ(OfvzeyDvw7bs1oZ)rxKm5L+?w>;aTfE;%Wm5N5 uIZ_U*tY=NPt0GC0H{f?n%j`FCGoo%v_x1c1p8sJG_!V#D;xG0s1O68@f?hoU literal 0 HcmV?d00001 diff --git a/tests/test_files/encodings/utf-32.html b/tests/test_files/encodings/utf-32.html new file mode 100644 index 0000000000000000000000000000000000000000..824cdf15d33e239dea57b8d4afd840177833bc61 GIT binary patch literal 15596 zcmeI%?T=nn6$S7i@C9{ZeEobZ5h2h{Emc7TnYL3}X{V)~2JzdJ7ZoW~1r?2dm;ar* ze(|p4=H_{ZnRcdx$cL4C?m7GHz4qGsocnb8pMP&`{9(yTnNlv4eTGmso-?5HQ}v1UkI#sPE0;MN3bf!`OKdQ&ITi3|EFL- z_$c^u@cZDC>G^S<=B1SOYY)0_A0Drh2h#}q|gHHqb{Us- z)u8!&lD+a3UyKLslWW<#AK*eSz2@DR-=pZpe%<*PS%b~sL2CSm?Bid3{~G9Fb$&O{ zZ{>N^vpcdET5tSm+gyy>XZqN>+SOiswyilXG*lbF$9j9V{%MTAe7g5;#`f~j{;=13 zbDZBLUv@2>hvl76Ev(7?ptDn3S~-g^2K0SBpd)RMdcN#eo7Stf-_9JL`oS4&|F!Sh z53Mim9oNTUayp2w{Z|9eYr#&SAMONVt?J8>`J2IqfxOt^HUgS?@T*%su z!1q^-5&R=~GQii`Zh*t-z}$9lJ)kj8_We8q;#Q_HeFP}8q415FT+R=@!YD+i2b>usO>UP}xa4TNQnAX(-H+A@RU@T|a$XA_cLyzXH z-uUt-hx+k-kdNM(+p;mYP7iBr`2DhLBNrMt7ybQmkUsO=*Xh$cO3%=T_Px*AFPAg7 zJm>4qnZ0L%a{*rud(NDXj|27Rk6---Vm1H&*Pi~Weyi`y-bbDfdnX;&x$F1b*>M;8 zF1iPIf_~4pGQKj+xhH)mto5$v<8IJ1@maL4$eGXd=oDO-M^plLHo;l z$-{p7J{{nFH}G5Ie&BAI>+UYXc{fl$<6FV9K6tgB;@$|lXJpJjo&FxY7gVR`GIqa} z6aVTdH}@z`a4x z`in2m>e=t5{=mb!v*W2QJ)7pltu_bs6B}Aw3vL8#{g!+)z|&lNaM^!&nyY^0i3d*V zJ?|qL$Y%s>4x{DQ}B0IoCkNSb$Q_J4Dw-(U30Y7{7+{rr*(a29iL?%i7!@pi^b+t zpq7s^X8V)I^WJJ+^e~nOeaG~~ohCh7*7k#CzsZFSAGN)c@m`>YGdzwz>_SM1?a&Lg1v_B7W2VzbAonlJZ> zFBUD0*}Fqu3;3Sj&+0=r8tQ@O&1bdV%eX#9#`E{;+`hrJXIP#e1-Qt61p9%x*5zu( zBe34Qn>!BTvX_T-zPxV+?kIECM$k25bMyP+FgntKZ$7I7o;aMIo(Fx4>?>Dw!skqY z2R`zVTXnsf@zP#AJj7q-Jf=B*JD%sbllgW0*xT1Lb0_02G9>-eRz!=Q1~YXwJqu&h5=} z8K0eCtOvZGpT^G7&w``yz7=~h#G)7d)L{hX*41{-n?|>To56Ol8;qbip5(nuPnzEg ziym-6O92=lh#qztbb@OZ~;CzkJo=YM`cS*Lq%_5=ERw)dBuo7Yxsn%kJ!f1W06 zVW}lO_@f^#ZwGVj@nLgoF+cJSUANb}9IFS7dj{x22mNE69@g|+b8B9=Gslm&VsWs=ldvYKpPzJ!NYh2zEw-kxT^~r=XL~N6z}cCrlb3%dX9{* z2l|E9p4OK(IM2UFa-I9Lj~+h?Cxn+KihtNxc~ZNz2{dz~-)%&+^MG3RmIImzey z!PkQ)f^P*+2LB45Z%lLFo1Sc}v3Z!hPvYLtcl!aY==gCkf{THg-V3^JO+V@rYtIKG zkkh>1y>AC2@Oz$bJ@kIie;3v-KYEQ%{{0>JgFs9+JuCh`YfgN4E%5JEd-&%=AGTll zI}_Mr-1qtVsm$xSqyGQL_O!40Z|2?KcSo?)f~}aiRR_FBAU@spgXU#!1h)g)sfi~Y z{HBqooc(SS|Df2{vrgaosD>kR{{A8E@?2PJkM3pcj4jXQt!ZxL3GCS}=dHn#WBppg zO-{H}13e;Nx;1BcSf>R(c(pe?tF1b(;y*G!M>+Fj{y}-%$zCz>$L%n&PeKoUL<2g} z3>Td41tak9czxqrR$Whu>$0cRr{9lnKJnPb>EPwyxnN}Ni{SC?#J(Dw30@1%1#blS z;OPDO^nMtAY>uO^-9_r8e(E#=b3G5vuo_!)9!9W=-+XR)E?4#6(uUvqK90Ozk3V@i zD{||-H0Rm7W@LSiReT@z+%Ea_4x;P3Q+(W_5S1f<`*$<3IFkf??aj@QVZR{>E|3R>BZ~vY&VuKrBeAgG=_>JI;s?%GE zjhFh+l?JWVD&26mPFuAY!JS~${_4->r1CrHE`0Iv$2MLHej1#e#><>)^UJKiFxkD5 z=ecQoVVZw+dgrhC%zL+f#5x}=Z~DZYb(G(VBlfE}&gXisEVZlF>USfcdH)VS-@l!CHrIlk dVE#Mp;xxCLr@C2hy!trE|CMRq^*s6K`@be)pW^@k literal 0 HcmV?d00001 diff --git a/tests/test_files/encodings/utf8.html b/tests/test_files/encodings/utf8.html new file mode 100644 index 00000000..c9d9b02e --- /dev/null +++ b/tests/test_files/encodings/utf8.html @@ -0,0 +1,169 @@ +๏ปฟ + + + + + + + + +Unicode Plane 1 Supplementary Character Examples using UTF-8 + + + + + + +

Example Unicode Usage For Business Applications

+

Demonstrating Unicode Plane 1 (Supplementary) Characters Encoded in UTF-8

+ + + +

The table on this page is identical to the table on +Unicode Plane 1 Characters Encoded as Numeric Character References (NCR). +except this table uses +UTF-8 encoding for the plane 1 characters, and the other uses NCRs +(Numeric Character References +of the form &#dddd; (decimal) or &#xhhhh; (hexadecimal)). +

+

The NCR page also +has a discussion of how to set up browsers to view these characters and which browsers work. +If you find browsers or configurations that work let me know. +Note that as of version 6, IE does not support Supplementary characters encoded in UTF-8. +Netscape and Opera do support them. Also Ximian Desktop 2 (XD2) displays this page correctly.

+ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
Example Plane 1 Unicode Data
Script
(links to Unicode code charts)
Origin
(in English)
Name
(English transliteration)
Origin
(in native language)
Name
(in native language) +
Submitters
EtruscanRasna (Etruria) Aulus Metellus
(Aules'i Metelis' )
๐Œ“๐Œ€๐Œ”๐Œ๐Œ€๐Œ€๐Œ–๐Œ‹๐Œ„๐Œ‘๐Œ‰ยท๐ŒŒ๐Œ„๐Œ•๐Œ„๐Œ‹๐Œ‰๐Œ‘Marco Cimarosti,
James Kass,
Andrew "Bass" +Shcheglov,
Michka Kaplan
Font: CODE2001
DeseretUtahBrigham Young๐๐ญ๐ป๐ซ ๐’๐‘‰๐ฎ๐‘€๐ฒ๐‘‹ ๐๐ฒ๐‘ John Jenkins
Font: CODE2001
GothicGothland
(Kingdom of the Goths)
(thizai +thiudangardjai thize Gutane)
Wulfila
(also Ulfilas)
๐Œธ๐Œน๐Œถ๐Œฐ๐Œน
๐Œธ๐Œน๐Œฟ๐Œณ๐Œฐ๐Œฝ๐Œฒ๐Œฐ๐‚๐Œณ๐Œพ๐Œฐ๐Œน
๐Œธ๐Œน๐Œถ๐Œด +
๐Œฒ๐Œฟ๐„๐Œฐ๐Œฝ๐Œด
๐…๐Œฟ๐Œป๐†๐Œน๐Œป๐Œฐ James Kass
Font: CODE2001
OsmanyaSomaliaCismaan Yuusuf Keenadiid
(inventor of Osmanya script)
๐’ˆ๐’๐’‘๐’›๐’๐’˜๐’•๐’– + +๐’‹๐’˜๐’ˆ๐’‘๐’›๐’’ ๐’•๐’“ +๐’ˆ๐’š๐’ ๐’๐’œ๐’’๐’–๐’† +๐’•๐’† +Mark Williamson +
Font: ANDAGII +
Linear B SyllabaryTulisosMinos๐€ถ๐€ช๐€ฐ(Unknown).Mark Williamson +
Font: PENUTURESU +
ShavianGreat Britain or United KingdomGeorge Bernard Shawยท๐‘œ๐‘ฎ๐‘ฑ๐‘‘ ยท๐‘š๐‘ฎ๐‘ฆ๐‘‘๐‘ฉ๐‘ฏ +or
ยท๐‘ฟ๐‘ฏ๐‘ฒ๐‘‘๐‘ง๐‘› ยท๐‘’๐‘ฆ๐‘™๐‘›๐‘ณ๐‘ฅ
๐‘ก๐‘น๐‘ก ยท๐‘š๐‘ป๐‘ฏ๐‘ธ๐‘› ยท๐‘–๐‘ทDoug Ewell based this entry on information from Simon Barne's (now defunct) web site. +
Font: CODE2001
+ +
+

Fonts

+ +
+ +
+Encoded in UTF-8! +Top of page +
This page last updated 2008-11-15 +
+ + + + + +