mirror of https://github.com/CIRCL/AIL-framework
186 lines
6.4 KiB
Python
Executable File
186 lines
6.4 KiB
Python
Executable File
#!/usr/bin/env python3
|
|
# -*-coding:UTF-8 -*
|
|
|
|
"""
|
|
The Mails Module
|
|
======================
|
|
|
|
This module is consuming the Redis-list created by the Categ module.
|
|
|
|
It apply mail regexes on item content and warn if above a threshold.
|
|
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import sys
|
|
import datetime
|
|
|
|
import dns.resolver
|
|
import dns.exception
|
|
|
|
from pyfaup.faup import Faup
|
|
|
|
sys.path.append(os.environ['AIL_BIN'])
|
|
##################################
|
|
# Import Project packages #
|
|
##################################
|
|
from modules.abstract_module import AbstractModule
|
|
from lib.ConfigLoader import ConfigLoader
|
|
# from lib import Statistics
|
|
|
|
|
|
class Mail(AbstractModule):
|
|
"""
|
|
Module Mail module for AIL framework
|
|
"""
|
|
|
|
def __init__(self, queue=True):
|
|
super(Mail, self).__init__(queue=queue)
|
|
|
|
config_loader = ConfigLoader()
|
|
self.r_cache = config_loader.get_redis_conn("Redis_Cache")
|
|
|
|
self.dns_server = config_loader.get_config_str('Mail', 'dns')
|
|
|
|
self.faup = Faup()
|
|
|
|
# Numbers of Mails needed to Tags
|
|
self.mail_threshold = 10
|
|
|
|
self.regex_timeout = 30
|
|
self.email_regex = r"[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,6}"
|
|
re.compile(self.email_regex)
|
|
|
|
def is_mxdomain_in_cache(self, mxdomain):
|
|
return self.r_cache.exists(f'mxdomain:{mxdomain}')
|
|
|
|
def save_mxdomain_in_cache(self, mxdomain):
|
|
self.r_cache.setex(f'mxdomain:{mxdomain}', datetime.timedelta(days=1), 1)
|
|
|
|
def check_mx_record(self, set_mxdomains):
|
|
"""Check if emails MX domains are responding.
|
|
|
|
:param set_mxdomains: -- (set) This is a set of emails domains
|
|
:return: (int) Number of address with a responding and valid MX domains
|
|
|
|
"""
|
|
resolver = dns.resolver.Resolver()
|
|
resolver.nameservers = [self.dns_server]
|
|
resolver.timeout = 5.0
|
|
resolver.lifetime = 2.0
|
|
|
|
valid_mxdomain = []
|
|
for mxdomain in set_mxdomains:
|
|
|
|
# check if is in cache
|
|
# # TODO:
|
|
if self.is_mxdomain_in_cache(mxdomain):
|
|
valid_mxdomain.append(mxdomain)
|
|
else:
|
|
|
|
# DNS resolution
|
|
try:
|
|
answers = resolver.query(mxdomain, rdtype=dns.rdatatype.MX)
|
|
if answers:
|
|
self.save_mxdomain_in_cache(mxdomain)
|
|
valid_mxdomain.append(mxdomain)
|
|
# DEBUG
|
|
# print('---')
|
|
# print(answers.response)
|
|
# print(answers.qname)
|
|
# print(answers.rdtype)
|
|
# print(answers.rdclass)
|
|
# print(answers.nameserver)
|
|
# print()
|
|
|
|
except dns.resolver.NoNameservers:
|
|
self.logger.debug('NoNameserver, No non-broken nameservers are available to answer the query.')
|
|
print('NoNameserver, No non-broken nameservers are available to answer the query.')
|
|
except dns.resolver.NoAnswer:
|
|
self.logger.debug('NoAnswer, The response did not contain an answer to the question.')
|
|
print('NoAnswer, The response did not contain an answer to the question.')
|
|
except dns.name.EmptyLabel:
|
|
self.logger.debug('SyntaxError: EmptyLabel')
|
|
print('SyntaxError: EmptyLabel')
|
|
except dns.resolver.NXDOMAIN:
|
|
# save_mxdomain_in_cache(mxdomain)
|
|
self.logger.debug('The query name does not exist.')
|
|
print('The query name does not exist.')
|
|
except dns.name.LabelTooLong:
|
|
self.logger.debug('The Label is too long')
|
|
print('The Label is too long')
|
|
except dns.exception.Timeout:
|
|
print('dns timeout')
|
|
# save_mxdomain_in_cache(mxdomain)
|
|
except Exception as e:
|
|
print(e)
|
|
return valid_mxdomain
|
|
|
|
def extract(self, obj, content, tag):
|
|
extracted = []
|
|
mxdomains = {}
|
|
mails = self.regex_finditer(self.email_regex, obj.get_global_id(), content)
|
|
for mail in mails:
|
|
start, end, value = mail
|
|
mxdomain = value.rsplit('@', 1)[1].lower()
|
|
if mxdomain not in mxdomains:
|
|
mxdomains[mxdomain] = []
|
|
mxdomains[mxdomain].append(mail)
|
|
for mx in self.check_mx_record(mxdomains.keys()):
|
|
for row in mxdomains[mx]:
|
|
extracted.append([row[0], row[1], row[2], f'tag:{tag}'])
|
|
return extracted
|
|
|
|
# # TODO: sanitize mails
|
|
def compute(self, message):
|
|
item = self.get_obj()
|
|
|
|
mails = self.regex_findall(self.email_regex, item.id, item.get_content())
|
|
mxdomains_email = {}
|
|
for mail in mails:
|
|
mxdomain = mail.rsplit('@', 1)[1].lower()
|
|
if not mxdomain in mxdomains_email:
|
|
mxdomains_email[mxdomain] = set()
|
|
mxdomains_email[mxdomain].add(mail)
|
|
|
|
# # TODO: add MAIL trackers
|
|
|
|
valid_mx = self.check_mx_record(mxdomains_email.keys())
|
|
print(f'valid_mx: {valid_mx}')
|
|
mx_tlds = {}
|
|
num_valid_email = 0
|
|
for domain_mx in valid_mx:
|
|
nb_mails = len(mxdomains_email[domain_mx])
|
|
num_valid_email += nb_mails
|
|
|
|
# Create domain_mail stats
|
|
# msg = f'mail;{nb_mails};{domain_mx};{item_date}'
|
|
# self.add_message_to_queue(msg, 'ModuleStats')
|
|
|
|
# Create country stats
|
|
self.faup.decode(domain_mx)
|
|
tld = self.faup.get()['tld']
|
|
try:
|
|
tld = tld.decode()
|
|
except:
|
|
pass
|
|
mx_tlds[tld] = mx_tlds.get(tld, 0) + nb_mails
|
|
# for tld in mx_tlds:
|
|
# Statistics.add_module_tld_stats_by_date('mail', item_date, tld, mx_tlds[tld])
|
|
|
|
msg = f'Checked {num_valid_email} e-mail(s);{self.obj.get_global_id()}'
|
|
if num_valid_email > self.mail_threshold:
|
|
print(f'{item.id} Checked {num_valid_email} e-mail(s)')
|
|
self.logger.info(msg)
|
|
# Tags
|
|
tag = 'infoleak:automatic-detection="mail"'
|
|
self.add_message_to_queue(message=tag, queue='Tags')
|
|
elif num_valid_email > 0:
|
|
self.logger.info(msg)
|
|
|
|
|
|
if __name__ == '__main__':
|
|
module = Mail()
|
|
module.run()
|