chg: [Mail] add country statistic

pull/250/head
Terrtia 2018-07-30 09:21:22 +02:00
parent 8beb63b6a8
commit 39f54d4420
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
4 changed files with 39 additions and 13 deletions

1
.gitignore vendored
View File

@ -31,6 +31,7 @@ var/www/submitted
# Local config
bin/packages/config.cfg
bin/packages/config.cfg.backup
configs/keys
# installed files

View File

@ -20,4 +20,14 @@ Redis and ARDB overview
- DB 0 - Lines duplicate
- DB 1 - Hashes
To be updated
ARDB overview
---------------------------
ARDB_DB
* DB 1 - Curve
* DB 2 - TermFreq
* DB 3 - Trending
* DB 4 - Sentiment
* DB 5 - TermCred
* DB 6 - Tags
* DB 7 - Metadata
* DB 8 - Statistics

View File

@ -14,6 +14,7 @@ It apply mail regexes on paste content and warn if above a threshold.
import redis
import pprint
import time
import datetime
import dns.exception
from packages import Paste
from packages import lib_refine
@ -36,6 +37,12 @@ if __name__ == "__main__":
port=p.config.getint("Redis_Cache", "port"),
db=p.config.getint("Redis_Cache", "db"),
decode_responses=True)
# ARDB #
server_statistics = redis.StrictRedis(
host=p.config.get("ARDB_Statistics", "host"),
port=p.config.getint("ARDB_Statistics", "port"),
db=p.config.getint("ARDB_Statistics", "db"),
decode_responses=True)
# FUNCTIONS #
publisher.info("Suscribed to channel mails_categ")
@ -66,7 +73,6 @@ if __name__ == "__main__":
PST.save_attribute_redis(channel, (MX_values[0],
list(MX_values[1])))
pprint.pprint(MX_values)
to_print = 'Mails;{};{};{};Checked {} e-mail(s);{}'.\
format(PST.p_source, PST.p_date, PST.p_name,
MX_values[0], PST.p_path)
@ -81,10 +87,14 @@ if __name__ == "__main__":
else:
publisher.info(to_print)
#Send to ModuleStats
#Send to ModuleStats and create country statistics
date = datetime.datetime.now().strftime("%Y%m")
for mail in MX_values[1]:
print('mail;{};{};{}'.format(1, mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(1, mail, PST.p_date), 'ModuleStats')
print('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date))
p.populate_set_out('mail;{};{};{}'.format(MX_values[1][mail], mail, PST.p_date), 'ModuleStats')
country = mail.split('.')[-1]
server_statistics.hincrby('mail_by_country:'+date, country, MX_values[1][mail])
prec_filename = filename

View File

@ -38,6 +38,7 @@ def checking_MX_record(r_serv, adress_set, addr_dns):
score = 0
num = len(adress_set)
WalidMX = set([])
validMX = {}
# Transforming the set into a string
MXdomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", str(adress_set).lower())
resolver = dns.resolver.Resolver()
@ -46,20 +47,23 @@ def checking_MX_record(r_serv, adress_set, addr_dns):
resolver.lifetime = 2
if MXdomains != []:
for MXdomain in set(MXdomains):
for MXdomain in MXdomains:
try:
MXdomain = MXdomain[1:]
# Already in Redis living.
if r_serv.exists(MXdomain[1:]):
if r_serv.exists(MXdomain):
score += 1
WalidMX.add(MXdomain[1:])
WalidMX.add(MXdomain)
validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
# Not already in Redis
else:
# If I'm Walid MX domain
if resolver.query(MXdomain[1:], rdtype=dns.rdatatype.MX):
if resolver.query(MXdomain, rdtype=dns.rdatatype.MX):
# Gonna be added in redis.
r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
r_serv.setex(MXdomain, 1, timedelta(days=1))
score += 1
WalidMX.add(MXdomain[1:])
WalidMX.add(MXdomain)
validMX[MXdomain] = validMX.get(MXdomain, 0) + 1
else:
pass
@ -86,13 +90,14 @@ def checking_MX_record(r_serv, adress_set, addr_dns):
except dns.resolver.Timeout:
print('timeout')
r_serv.setex(MXdomain[1:], 1, timedelta(days=1))
r_serv.setex(MXdomain, 1, timedelta(days=1))
except Exception as e:
print(e)
publisher.debug("emails before: {0} after: {1} (valid)".format(num, score))
return (num, WalidMX)
#return (num, WalidMX)
return (num, validMX)
def checking_A_record(r_serv, domains_set):