chg: [statistics] clean scripts

pull/250/head
Terrtia 2018-09-12 11:21:11 +02:00
parent ea3d2c1977
commit 204e996fc3
No known key found for this signature in database
GPG Key ID: 1E1B1F50D84613D0
6 changed files with 98 additions and 65 deletions

1
.gitignore vendored
View File

@ -40,3 +40,4 @@ doc/all_modules.txt
# auto generated # auto generated
doc/module-data-flow.png doc/module-data-flow.png
doc/data-flow.png doc/data-flow.png
doc/statistics

View File

@ -62,7 +62,7 @@ def check_all_iban(l_iban, paste, filename):
if is_valid_iban(iban): if is_valid_iban(iban):
print('------') print('------')
nb_valid_iban = nb_valid_iban + 1 nb_valid_iban = nb_valid_iban + 1
server_statistics.hincrby('iban_by_tld:'+date, iban[0:2], 1) server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
if(nb_valid_iban > 0): if(nb_valid_iban > 0):
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name) to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)

View File

@ -10,8 +10,6 @@ the out output of the Global module.
""" """
import time import time
import datetime
import redis
from packages import Paste from packages import Paste
from pubsublogger import publisher from pubsublogger import publisher
@ -28,13 +26,6 @@ def main():
p = Process(config_section) p = Process(config_section)
addr_dns = p.config.get("DomClassifier", "dns") addr_dns = p.config.get("DomClassifier", "dns")
# ARDB #
server_statistics = redis.StrictRedis(
host=p.config.get("ARDB_Statistics", "host"),
port=p.config.getint("ARDB_Statistics", "port"),
db=p.config.getint("ARDB_Statistics", "db"),
decode_responses=True)
publisher.info("""ZMQ DomainClassifier is Running""") publisher.info("""ZMQ DomainClassifier is Running""")
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns]) c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
@ -55,31 +46,20 @@ def main():
paste = PST.get_p_content() paste = PST.get_p_content()
mimetype = PST._get_p_encoding() mimetype = PST._get_p_encoding()
nb_domain = 0
nb_tld_domain = 0
if mimetype == "text/plain": if mimetype == "text/plain":
c.text(rawtext=paste) c.text(rawtext=paste)
c.potentialdomain() c.potentialdomain()
valid = c.validdomain(rtype=['A'], extended=True) c.validdomain(rtype=['A'], extended=True)
nb_domain = len(set(valid)) localizeddomains = c.include(expression=cc_tld)
if nb_domain > 0: if localizeddomains:
localizeddomains = c.include(expression=cc_tld) print(localizeddomains)
if localizeddomains: publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
nb_tld_domain = len(set(localizeddomains)) PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format( localizeddomains = c.localizedomain(cc=cc)
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path)) if localizeddomains:
print(localizeddomains)
localizeddomains = c.localizedomain(cc=cc) publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
if localizeddomains: PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
nb_tld_domain = nb_tld_domain + len(set(localizeddomains))
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
date = datetime.datetime.now().strftime("%Y%m")
server_statistics.hincrby('domain_by_tld:'+date, 'ALL', nb_domain)
if nb_tld_domain > 0:
server_statistics.hincrby('domain_by_tld:'+date, cc, nb_tld_domain)
except IOError: except IOError:
print("CRC Checksum Failed on :", PST.p_path) print("CRC Checksum Failed on :", PST.p_path)
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format( publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(

70
bin/DomainSubject.py Executable file
View File

@ -0,0 +1,70 @@
#!/usr/bin/env python3
# -*-coding:UTF-8 -*
from packages import Paste
from Helper import Process
from pubsublogger import publisher
import time
import redis
import newspaper
from collections import defaultdict
from newspaper import fulltext
if __name__ == '__main__':
publisher.port = 6380
publisher.channel = "Script"
publisher.info("Script DomainSubject started")
config_section = 'DomainSubject'
p = Process(config_section)
r_onion = redis.StrictRedis(
host=p.config.get("ARDB_Onion", "host"),
port=p.config.getint("ARDB_Onion", "port"),
db=p.config.getint("ARDB_Onion", "db"),
decode_responses=True)
while True:
# format: <domain>
domain = p.get_from_set()
domain = 'easycoinsayj7p5l.onion'
if domain is not None:
#retrieve all crawled pastes
set_crawles_pastes = r_onion.smembers('temp:crawled_domain_pastes:{}'.format(domain))
if set_crawles_pastes:
dict_keyword = defaultdict(int)
for paste_path in set_crawles_pastes:
paste = Paste.Paste(paste_path)
content = paste.get_p_content()
article = newspaper.Article(url='')
article.set_html(content)
article.parse()
article.nlp()
for keyword in article.keywords:
dict_keyword[keyword] += 1
if dict_keyword:
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
for item in res:
print(item)
else:
print('no keywords found')
time.sleep(60)
else:
time.sleep(5)

View File

@ -11,9 +11,7 @@ It apply phone number regexes on paste content and warn if above a threshold.
""" """
import datetime
import time import time
import redis
import re import re
import phonenumbers import phonenumbers
from packages import Paste from packages import Paste
@ -25,10 +23,8 @@ def search_phone(message):
paste = Paste.Paste(message) paste = Paste.Paste(message)
content = paste.get_p_content() content = paste.get_p_content()
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required) # regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})') reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})') reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
# use non capturing group
reg_phone = re.compile(r'(?:\+\d{1,4}(?:\(\d\))?\d?|0\d?)(?:\d{6,8}|(?:[-/\. ]{1}\(?\d{2,4}\)?){3,4})')
# list of the regex results in the Paste, may be null # list of the regex results in the Paste, may be null
results = reg_phone.findall(content) results = reg_phone.findall(content)
@ -49,23 +45,17 @@ def search_phone(message):
for phone_number in results: for phone_number in results:
try: try:
x = phonenumbers.parse(phone_number, None) x = phonenumbers.parse(phone_number, None)
print(x)
country_code = x.country_code country_code = x.country_code
if stats.get(country_code) is None: if stats.get(country_code) is None:
stats[country_code] = 1 stats[country_code] = 1
else: else:
stats[country_code] = stats[country_code] + 1 stats[country_code] = stats[country_code] + 1
except Exception as e: except:
#print(e)
pass pass
date = datetime.datetime.now().strftime("%Y%m")
for country_code in stats: for country_code in stats:
print(country_code)
if stats[country_code] > 4: if stats[country_code] > 4:
publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code)) publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
if __name__ == '__main__': if __name__ == '__main__':
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh) # If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
# Port of the redis instance used by pubsublogger # Port of the redis instance used by pubsublogger
@ -82,13 +72,6 @@ if __name__ == '__main__':
# Sent to the logging a description of the module # Sent to the logging a description of the module
publisher.info("Run Phone module") publisher.info("Run Phone module")
# ARDB #
server_statistics = redis.StrictRedis(
host=p.config.get("ARDB_Statistics", "host"),
port=p.config.getint("ARDB_Statistics", "port"),
db=p.config.getint("ARDB_Statistics", "db"),
decode_responses=True)
# Endless loop getting messages from the input queue # Endless loop getting messages from the input queue
while True: while True:
# Get one message from the input queue # Get one message from the input queue

View File

@ -2,7 +2,9 @@
# -*-coding:UTF-8 -* # -*-coding:UTF-8 -*
''' '''
lu Create statistics pie charts by tld
Default tld: lu
''' '''
import os import os
@ -64,7 +66,7 @@ def create_pie_chart(country ,db_key, date, pie_title, path, save_name):
ax1.set_title(pie_title) ax1.set_title(pie_title)
#plt.show() #plt.show()
plt.savefig(os.path.join(path, save_name)) plt.savefig(os.path.join(path,save_name))
plt.close(fig1) plt.close(fig1)
def create_donut_chart(db_key, date, pie_title, path, save_name): def create_donut_chart(db_key, date, pie_title, path, save_name):
@ -126,7 +128,7 @@ if __name__ == '__main__':
parser = argparse.ArgumentParser( parser = argparse.ArgumentParser(
description='''This script is a part of the Analysis Information Leak description='''This script is a part of the Analysis Information Leak
framework. It create pie charts on a country statistics".''', framework. Create statistics pie charts".''',
epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807') epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807')
parser.add_argument('type', type=int, default=0, parser.add_argument('type', type=int, default=0,
@ -135,12 +137,11 @@ if __name__ == '__main__':
1: credential_pie, 1: credential_pie,
2: mail_pie 2: mail_pie
3: sqlinjection_pie, 3: sqlinjection_pie,
4: domain_pie, 4: iban_pie,''',
5: iban_pie,''', choices=[0, 1, 2, 3, 4], action='store')
choices=[0, 1, 2, 3, 4, 5], action='store')
parser.add_argument('country', type=str, default="de", parser.add_argument('country', type=str, default="lu",
help='''The country code, de:default''', help='''The country code, lu:default''',
action='store') action='store')
parser.add_argument('date', type=str, default="now", parser.add_argument('date', type=str, default="now",
@ -148,7 +149,7 @@ if __name__ == '__main__':
args = parser.parse_args() args = parser.parse_args()
path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file path = os.path.join(os.environ['AIL_HOME'], 'doc', 'statistics') # save path
config_section = 'ARDB_Statistics' config_section = 'ARDB_Statistics'
@ -171,7 +172,7 @@ if __name__ == '__main__':
create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png') create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png')
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png') create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png')
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png') create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')
elif args.type == 1: elif args.type == 1:
create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png') create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png')
elif args.type == 2: elif args.type == 2:
@ -179,6 +180,4 @@ if __name__ == '__main__':
elif args.type == 3: elif args.type == 3:
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png') create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png')
elif args.type == 4: elif args.type == 4:
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png') create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')
elif args.type == 5:
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png')