chg: [Mail] add country statistic

pull/250/head
Terrtia 2018-07-30 09:21:22 +02:00
parent 8beb63b6a8
commit 39f54d4420
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
4 changed files with 39 additions and 13 deletions

1
.gitignore vendored
View File

@ -31,6 +31,7 @@ var/www/submitted
# Local config # Local config
bin/packages/config.cfg bin/packages/config.cfg
bin/packages/config.cfg.backup
configs/keys configs/keys
# installed files # installed files

View File

@ -20,4 +20,14 @@ Redis and ARDB overview
- DB 0 - Lines duplicate - DB 0 - Lines duplicate
- DB 1 - Hashes - DB 1 - Hashes
To be updated ARDB overview
---------------------------
ARDB_DB
* DB 1 - Curve
* DB 2 - TermFreq
* DB 3 - Trending
* DB 4 - Sentiment
* DB 5 - TermCred
* DB 6 - Tags
* DB 7 - Metadata
* DB 8 - Statistics

View File

@ -14,6 +14,7 @@ It apply mail regexes on paste content and warn if above a threshold.
import redis import redis
import pprint import pprint
import time import time
import datetime
import dns.exception import dns.exception
from packages import Paste from packages import Paste
from packages import lib_refine from packages import lib_refine
@ -36,6 +37,12 @@ if __name__ == "__main__":
port=p.config.getint("Redis_Cache", "port"), port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"), db=p.config.getint("Redis_Cache", "db"),
decode_responses=True) decode_responses=True)
# ARDB #
server_statistics = redis.StrictRedis(
host=p.config.get("ARDB_Statistics", "host"),
port=p.config.getint("ARDB_Statistics", "port"),
db=p.config.getint("ARDB_Statistics", "db"),
decode_responses=True)
# FUNCTIONS # # FUNCTIONS #
publisher.info("Suscribed to channel mails_categ") publisher.info("Suscribed to channel mails_categ")
@ -66,7 +73,6 @@ if __name__ == "__main__":
PST.save_attribute_redis(channel, (MX_values[0], PST.save_attribute_redis(channel, (MX_values[0],
list(MX_values[1]))) list(MX_values[1])))
pprint.pprint(MX_values)
to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\ to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\
format(PST.p_source, PST.p_date, PST.p_name, format(PST.p_source, PST.p_date, PST.p_name,
MX_values[0], PST.p_path) MX_values[0], PST.p_path)
@ -81,10 +87,14 @@ if __name__ == "__main__":
else: else:
publisher.info(to_print) publisher.info(to_print)
#Send to ModuleStats #Send to ModuleStats and create country statistics
date = datetime.datetime.now().strftime("%Y%m")
for mail in MX_values[1]: for mail in MX_values[1]:
print('mail;{};{};{}'.format(1, mail, PST.p_date)) print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats') p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats')
country = mail.split('.')[-1]
server_statistics.hincrby('mail_by_country:'+date, country, MX_values[1][mail])
prec_filename = filename prec_filename = filename

View File

@ -38,6 +38,7 @@ def checking_MX_record(r_serv, adress_set, addr_dns):
score = 0 score = 0
num = len(adress_set) num = len(adress_set)
WalidMX = set([]) WalidMX = set([])
validMX = {}
# Transforming the set into a string # Transforming the set into a string
MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower()) MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
resolver = dns.resolver.Resolver() resolver = dns.resolver.Resolver()
@ -46,20 +47,23 @@ def checking_MX_record(r_serv, adress_set, addr_dns):
resolver.lifetime = 2 resolver.lifetime = 2
if MXdomains != []: if MXdomains != []:
for MXdomain in set(MXdomains): for MXdomain in MXdomains:
try: try:
MXdomain = MXdomain[1:]
# Already in Redis living. # Already in Redis living.
if r_serv.exists(MXdomain[1:]): if r_serv.exists(MXdomain):
score += 1 score += 1
WalidMX.add(MXdomain[1:]) WalidMX.add(MXdomain)
validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
# Not already in Redis # Not already in Redis
else: else:
# If I'm Walid MX domain # If I'm Walid MX domain
if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX): if resolver.query(MXdomain, rdtype=dns.rdatatype.MX):
# Gonna be added in redis. # Gonna be added in redis.
r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) r_serv.setex(MXdomain, 1, timedelta(days=1))
score += 1 score += 1
WalidMX.add(MXdomain[1:]) WalidMX.add(MXdomain)
validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
else: else:
pass pass
@ -86,13 +90,14 @@ def checking_MX_record(r_serv, adress_set, addr_dns):
except dns.resolver.Timeout: except dns.resolver.Timeout:
print('timeout') print('timeout')
r_serv.setex(MXdomain[1:], 1, timedelta(days=1)) r_serv.setex(MXdomain, 1, timedelta(days=1))
except Exception as e: except Exception as e:
print(e) print(e)
publisher.debug("emails before: {0} after: {1} (valid)".format(num, score)) publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
return (num, WalidMX) #return (num, WalidMX)
return (num, validMX)
def checking_A_record(r_serv, domains_set): def checking_A_record(r_serv, domains_set):