From 0436118747d4a199e8b42e0b19840767dc824859 Mon Sep 17 00:00:00 2001 From: "x41\\x43" Date: Tue, 6 Mar 2018 18:12:36 +0100 Subject: [PATCH] Improving regex (validating e-mail) Line 48: The previous regex ` ^[\w\.\+\-]+\@[\w]+\.[a-z]{2,3}$ ` matched only a small subset of valid e-mail address (e.g.: didn't match domain names longer than 3 chars or user@this-domain.de or user@multiple.level.dom) and needed to be with start (^) and end ($). This ` [a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])? ` is not perfect (e.g: can't match oriental chars), but imho is much more complete. Regex tested with several e-mail addresses with Python 3.6.4 and Python 2.7.14 on Linux 4.14. --- misp_modules/modules/expansion/otx.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/misp_modules/modules/expansion/otx.py b/misp_modules/modules/expansion/otx.py index ad9da2f..214e7f0 100755 --- a/misp_modules/modules/expansion/otx.py +++ b/misp_modules/modules/expansion/otx.py @@ -45,7 +45,7 @@ def findAll(data, keys): return a def valid_email(email): - return bool(re.search(r"^[\w\.\+\-]+\@[\w]+\.[a-z]{2,3}$", email)) + return bool(re.search(r"[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+(?:\.[a-zA-Z0-9!#$%&'*+\/=?^_`{|}~-]+)*@(?:[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?\.)+[a-zA-Z0-9](?:[a-zA-Z0-9-]*[a-zA-Z0-9])?", email)) def handler(q=False): if q is False: