fix: [Mails] add regex timeout

pull/497/head
Terrtia 2020-05-04 14:05:04 +02:00
parent c3d37f2bbf
commit 42d7a6cb78
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
2 changed files with 59 additions and 23 deletions

View File

@ -23,6 +23,20 @@ from pyfaup.faup import Faup
from Helper import Process from Helper import Process
sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages'))
import Item
import signal
class TimeoutException(Exception):
pass
def timeout_handler(signum, frame):
raise TimeoutException
signal.signal(signal.SIGALRM, timeout_handler)
max_execution_time = 30
if __name__ == "__main__": if __name__ == "__main__":
publisher.port = 6380 publisher.port = 6380
publisher.channel = "Script" publisher.channel = "Script"
@ -35,7 +49,7 @@ if __name__ == "__main__":
addr_dns = p.config.get("Mail", "dns") addr_dns = p.config.get("Mail", "dns")
# REDIS # # REDIS #
r_serv2 = redis.StrictRedis( r_serv_cache = redis.StrictRedis(
host=p.config.get("Redis_Cache", "host"), host=p.config.get("Redis_Cache", "host"),
port=p.config.getint("Redis_Cache", "port"), port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"), db=p.config.getint("Redis_Cache", "db"),
@ -52,23 +66,53 @@ if __name__ == "__main__":
# FIXME For retro compatibility # FIXME For retro compatibility
channel = 'mails_categ' channel = 'mails_categ'
prec_item_id = None
message = p.get_from_set()
prec_filename = None
# Log as critical if there are more that that amout of valid emails # Log as critical if there are more that that amout of valid emails
is_critical = 10 is_critical = 10
max_execution_time = 60
email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}" email_regex = "[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
MX_values = None MX_values = None
while True: while True:
if message is not None: message = p.get_from_set()
filename, score = message.split()
if prec_filename is None or filename != prec_filename: if message is not None:
PST = Paste.Paste(filename) item_id, score = message.split()
MX_values = lib_refine.checking_MX_record(
r_serv2, PST.get_regex(email_regex), addr_dns) if prec_item_id is None or item_id != prec_item_id:
PST = Paste.Paste(item_id)
# max execution time on regex
signal.alarm(max_execution_time)
try:
l_mails = re.findall(email_regex, Item.get_item_content())
except TimeoutException:
p.incr_module_timeout_statistic() # add encoder type
err_mess = "Mail: processing timeout: {}".format(item_id)
print(err_mess)
publisher.info(err_mess)
continue
else:
signal.alarm(0)
l_mails = list(set(l_mails))
# max execution time on regex
signal.alarm(max_execution_time)
try:
# Transforming the set into a string
MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(l_mails).lower())
except TimeoutException:
p.incr_module_timeout_statistic() # add encoder type
err_mess = "Mail: processing timeout: {}".format(item_id)
print(err_mess)
publisher.info(err_mess)
continue
else:
signal.alarm(0)
MX_values = lib_refine.checking_MX_record(r_serv_cache, MXdomains, addr_dns)
if MX_values[0] >= 1: if MX_values[0] >= 1:
@ -82,9 +126,9 @@ if __name__ == "__main__":
if MX_values[0] > is_critical: if MX_values[0] > is_critical:
publisher.warning(to_print) publisher.warning(to_print)
#Send to duplicate #Send to duplicate
p.populate_set_out(filename, 'Duplicate') p.populate_set_out(item_id, 'Duplicate')
msg = 'infoleak:automatic-detection="mail";{}'.format(filename) msg = 'infoleak:automatic-detection="mail";{}'.format(item_id)
p.populate_set_out(msg, 'Tags') p.populate_set_out(msg, 'Tags')
#create country statistics #create country statistics
@ -108,11 +152,9 @@ if __name__ == "__main__":
print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date)) print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats') p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats')
prec_filename = filename prec_item_id = item_id
else: else:
publisher.debug("Script Mails is Idling 10s") publisher.debug("Script Mails is Idling 10s")
print('Sleeping') print('Sleeping')
time.sleep(10) time.sleep(10)
message = p.get_from_set()

View File

@ -27,12 +27,11 @@ def is_luhn_valid(card_number):
return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0 return (sum(r[0::2]) + sum(sum(divmod(d*2, 10)) for d in r[1::2])) % 10 == 0
def checking_MX_record(r_serv, adress_set, addr_dns): def checking_MX_record(r_serv, MXdomains, addr_dns):
"""Check if emails MX domains are responding. """Check if emails MX domains are responding.
:param r_serv: -- Redis connexion database :param r_serv: -- Redis connexion database
:param adress_set: -- (set) This is a set of emails adress :param adress_set: -- (set) This is a set of emails adress
:param adress_set: -- (str) This is a server dns address
:return: (int) Number of adress with a responding and valid MX domains :return: (int) Number of adress with a responding and valid MX domains
This function will split the email adress and try to resolve their domains This function will split the email adress and try to resolve their domains
@ -40,15 +39,10 @@ def checking_MX_record(r_serv, adress_set, addr_dns):
""" """
#remove duplicate
adress_set = list(set(adress_set))
score = 0 score = 0
num = len(adress_set)
WalidMX = set([]) WalidMX = set([])
validMX = {} validMX = {}
# Transforming the set into a string num = len(MXdomains)
MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
resolver = dns.resolver.Resolver() resolver = dns.resolver.Resolver()
resolver.nameservers = [addr_dns] resolver.nameservers = [addr_dns]
resolver.timeout = 5 resolver.timeout = 5