mirror of https://github.com/CIRCL/AIL-framework
chg: [statistics] clean scripts
parent
ea3d2c1977
commit
204e996fc3
|
@ -40,3 +40,4 @@ doc/all_modules.txt
|
||||||
# auto generated
|
# auto generated
|
||||||
doc/module-data-flow.png
|
doc/module-data-flow.png
|
||||||
doc/data-flow.png
|
doc/data-flow.png
|
||||||
|
doc/statistics
|
||||||
|
|
|
@ -62,7 +62,7 @@ def check_all_iban(l_iban, paste, filename):
|
||||||
if is_valid_iban(iban):
|
if is_valid_iban(iban):
|
||||||
print('------')
|
print('------')
|
||||||
nb_valid_iban = nb_valid_iban + 1
|
nb_valid_iban = nb_valid_iban + 1
|
||||||
server_statistics.hincrby('iban_by_tld:'+date, iban[0:2], 1)
|
server_statistics.hincrby('iban_by_country:'+date, iban[0:2], 1)
|
||||||
|
|
||||||
if(nb_valid_iban > 0):
|
if(nb_valid_iban > 0):
|
||||||
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
|
to_print = 'Iban;{};{};{};'.format(paste.p_source, paste.p_date, paste.p_name)
|
||||||
|
|
|
@ -10,8 +10,6 @@ the out output of the Global module.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
import time
|
import time
|
||||||
import datetime
|
|
||||||
import redis
|
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
from pubsublogger import publisher
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
@ -28,13 +26,6 @@ def main():
|
||||||
p = Process(config_section)
|
p = Process(config_section)
|
||||||
addr_dns = p.config.get("DomClassifier", "dns")
|
addr_dns = p.config.get("DomClassifier", "dns")
|
||||||
|
|
||||||
# ARDB #
|
|
||||||
server_statistics = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_Statistics", "host"),
|
|
||||||
port=p.config.getint("ARDB_Statistics", "port"),
|
|
||||||
db=p.config.getint("ARDB_Statistics", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
publisher.info("""ZMQ DomainClassifier is Running""")
|
publisher.info("""ZMQ DomainClassifier is Running""")
|
||||||
|
|
||||||
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
|
c = DomainClassifier.domainclassifier.Extract(rawtext="", nameservers=[addr_dns])
|
||||||
|
@ -55,31 +46,20 @@ def main():
|
||||||
paste = PST.get_p_content()
|
paste = PST.get_p_content()
|
||||||
mimetype = PST._get_p_encoding()
|
mimetype = PST._get_p_encoding()
|
||||||
|
|
||||||
nb_domain = 0
|
|
||||||
nb_tld_domain = 0
|
|
||||||
|
|
||||||
if mimetype == "text/plain":
|
if mimetype == "text/plain":
|
||||||
c.text(rawtext=paste)
|
c.text(rawtext=paste)
|
||||||
c.potentialdomain()
|
c.potentialdomain()
|
||||||
valid = c.validdomain(rtype=['A'], extended=True)
|
c.validdomain(rtype=['A'], extended=True)
|
||||||
nb_domain = len(set(valid))
|
localizeddomains = c.include(expression=cc_tld)
|
||||||
if nb_domain > 0:
|
if localizeddomains:
|
||||||
localizeddomains = c.include(expression=cc_tld)
|
print(localizeddomains)
|
||||||
if localizeddomains:
|
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||||
nb_tld_domain = len(set(localizeddomains))
|
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
|
||||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
localizeddomains = c.localizedomain(cc=cc)
|
||||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc_tld, PST.p_path))
|
if localizeddomains:
|
||||||
|
print(localizeddomains)
|
||||||
localizeddomains = c.localizedomain(cc=cc)
|
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
||||||
if localizeddomains:
|
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
|
||||||
nb_tld_domain = nb_tld_domain + len(set(localizeddomains))
|
|
||||||
publisher.warning('DomainC;{};{};{};Checked {} located in {};{}'.format(
|
|
||||||
PST.p_source, PST.p_date, PST.p_name, localizeddomains, cc, PST.p_path))
|
|
||||||
|
|
||||||
date = datetime.datetime.now().strftime("%Y%m")
|
|
||||||
server_statistics.hincrby('domain_by_tld:'+date, 'ALL', nb_domain)
|
|
||||||
if nb_tld_domain > 0:
|
|
||||||
server_statistics.hincrby('domain_by_tld:'+date, cc, nb_tld_domain)
|
|
||||||
except IOError:
|
except IOError:
|
||||||
print("CRC Checksum Failed on :", PST.p_path)
|
print("CRC Checksum Failed on :", PST.p_path)
|
||||||
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
publisher.error('Duplicate;{};{};{};CRC Checksum Failed'.format(
|
||||||
|
|
|
@ -0,0 +1,70 @@
|
||||||
|
#!/usr/bin/env python3
|
||||||
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
|
|
||||||
|
from packages import Paste
|
||||||
|
from Helper import Process
|
||||||
|
from pubsublogger import publisher
|
||||||
|
|
||||||
|
import time
|
||||||
|
import redis
|
||||||
|
import newspaper
|
||||||
|
|
||||||
|
from collections import defaultdict
|
||||||
|
|
||||||
|
from newspaper import fulltext
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
|
||||||
|
publisher.port = 6380
|
||||||
|
publisher.channel = "Script"
|
||||||
|
|
||||||
|
publisher.info("Script DomainSubject started")
|
||||||
|
|
||||||
|
config_section = 'DomainSubject'
|
||||||
|
p = Process(config_section)
|
||||||
|
|
||||||
|
r_onion = redis.StrictRedis(
|
||||||
|
host=p.config.get("ARDB_Onion", "host"),
|
||||||
|
port=p.config.getint("ARDB_Onion", "port"),
|
||||||
|
db=p.config.getint("ARDB_Onion", "db"),
|
||||||
|
decode_responses=True)
|
||||||
|
|
||||||
|
|
||||||
|
while True:
|
||||||
|
|
||||||
|
# format: <domain>
|
||||||
|
domain = p.get_from_set()
|
||||||
|
domain = 'easycoinsayj7p5l.onion'
|
||||||
|
|
||||||
|
if domain is not None:
|
||||||
|
|
||||||
|
#retrieve all crawled pastes
|
||||||
|
set_crawles_pastes = r_onion.smembers('temp:crawled_domain_pastes:{}'.format(domain))
|
||||||
|
if set_crawles_pastes:
|
||||||
|
dict_keyword = defaultdict(int)
|
||||||
|
|
||||||
|
for paste_path in set_crawles_pastes:
|
||||||
|
|
||||||
|
paste = Paste.Paste(paste_path)
|
||||||
|
content = paste.get_p_content()
|
||||||
|
|
||||||
|
article = newspaper.Article(url='')
|
||||||
|
article.set_html(content)
|
||||||
|
article.parse()
|
||||||
|
article.nlp()
|
||||||
|
|
||||||
|
for keyword in article.keywords:
|
||||||
|
dict_keyword[keyword] += 1
|
||||||
|
|
||||||
|
|
||||||
|
if dict_keyword:
|
||||||
|
res = [(k, dict_keyword[k]) for k in sorted(dict_keyword, key=dict_keyword.get, reverse=True)]
|
||||||
|
for item in res:
|
||||||
|
print(item)
|
||||||
|
else:
|
||||||
|
print('no keywords found')
|
||||||
|
time.sleep(60)
|
||||||
|
|
||||||
|
else:
|
||||||
|
time.sleep(5)
|
23
bin/Phone.py
23
bin/Phone.py
|
@ -11,9 +11,7 @@ It apply phone number regexes on paste content and warn if above a threshold.
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import datetime
|
|
||||||
import time
|
import time
|
||||||
import redis
|
|
||||||
import re
|
import re
|
||||||
import phonenumbers
|
import phonenumbers
|
||||||
from packages import Paste
|
from packages import Paste
|
||||||
|
@ -25,10 +23,8 @@ def search_phone(message):
|
||||||
paste = Paste.Paste(message)
|
paste = Paste.Paste(message)
|
||||||
content = paste.get_p_content()
|
content = paste.get_p_content()
|
||||||
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
# regex to find phone numbers, may raise many false positives (shalt thou seek optimization, upgrading is required)
|
||||||
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\d{2,3}){3,4})')
|
||||||
#reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
reg_phone = re.compile(r'(\+\d{1,4}(\(\d\))?\d?|0\d?)(\d{6,8}|([-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
||||||
# use non capturing group
|
|
||||||
reg_phone = re.compile(r'(?:\+\d{1,4}(?:\(\d\))?\d?|0\d?)(?:\d{6,8}|(?:[-/\. ]{1}\(?\d{2,4}\)?){3,4})')
|
|
||||||
# list of the regex results in the Paste, may be null
|
# list of the regex results in the Paste, may be null
|
||||||
results = reg_phone.findall(content)
|
results = reg_phone.findall(content)
|
||||||
|
|
||||||
|
@ -49,23 +45,17 @@ def search_phone(message):
|
||||||
for phone_number in results:
|
for phone_number in results:
|
||||||
try:
|
try:
|
||||||
x = phonenumbers.parse(phone_number, None)
|
x = phonenumbers.parse(phone_number, None)
|
||||||
print(x)
|
|
||||||
country_code = x.country_code
|
country_code = x.country_code
|
||||||
if stats.get(country_code) is None:
|
if stats.get(country_code) is None:
|
||||||
stats[country_code] = 1
|
stats[country_code] = 1
|
||||||
else:
|
else:
|
||||||
stats[country_code] = stats[country_code] + 1
|
stats[country_code] = stats[country_code] + 1
|
||||||
except Exception as e:
|
except:
|
||||||
#print(e)
|
|
||||||
pass
|
pass
|
||||||
|
|
||||||
date = datetime.datetime.now().strftime("%Y%m")
|
|
||||||
for country_code in stats:
|
for country_code in stats:
|
||||||
print(country_code)
|
|
||||||
if stats[country_code] > 4:
|
if stats[country_code] > 4:
|
||||||
publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
|
publisher.warning('{} contains Phone numbers with country code {}'.format(paste.p_name, country_code))
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
# If you wish to use an other port of channel, do not forget to run a subscriber accordingly (see launch_logs.sh)
|
||||||
# Port of the redis instance used by pubsublogger
|
# Port of the redis instance used by pubsublogger
|
||||||
|
@ -82,13 +72,6 @@ if __name__ == '__main__':
|
||||||
# Sent to the logging a description of the module
|
# Sent to the logging a description of the module
|
||||||
publisher.info("Run Phone module")
|
publisher.info("Run Phone module")
|
||||||
|
|
||||||
# ARDB #
|
|
||||||
server_statistics = redis.StrictRedis(
|
|
||||||
host=p.config.get("ARDB_Statistics", "host"),
|
|
||||||
port=p.config.getint("ARDB_Statistics", "port"),
|
|
||||||
db=p.config.getint("ARDB_Statistics", "db"),
|
|
||||||
decode_responses=True)
|
|
||||||
|
|
||||||
# Endless loop getting messages from the input queue
|
# Endless loop getting messages from the input queue
|
||||||
while True:
|
while True:
|
||||||
# Get one message from the input queue
|
# Get one message from the input queue
|
||||||
|
|
|
@ -2,7 +2,9 @@
|
||||||
# -*-coding:UTF-8 -*
|
# -*-coding:UTF-8 -*
|
||||||
|
|
||||||
'''
|
'''
|
||||||
lu
|
Create statistics pie charts by tld
|
||||||
|
|
||||||
|
Default tld: lu
|
||||||
'''
|
'''
|
||||||
|
|
||||||
import os
|
import os
|
||||||
|
@ -64,7 +66,7 @@ def create_pie_chart(country ,db_key, date, pie_title, path, save_name):
|
||||||
|
|
||||||
ax1.set_title(pie_title)
|
ax1.set_title(pie_title)
|
||||||
#plt.show()
|
#plt.show()
|
||||||
plt.savefig(os.path.join(path, save_name))
|
plt.savefig(os.path.join(path,save_name))
|
||||||
plt.close(fig1)
|
plt.close(fig1)
|
||||||
|
|
||||||
def create_donut_chart(db_key, date, pie_title, path, save_name):
|
def create_donut_chart(db_key, date, pie_title, path, save_name):
|
||||||
|
@ -126,7 +128,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
parser = argparse.ArgumentParser(
|
parser = argparse.ArgumentParser(
|
||||||
description='''This script is a part of the Analysis Information Leak
|
description='''This script is a part of the Analysis Information Leak
|
||||||
framework. It create pie charts on a country statistics".''',
|
framework. Create statistics pie charts".''',
|
||||||
epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807')
|
epilog='Example: ./create_lu_graph.py 0 lu now, create_lu_graph.py 0 lu 201807')
|
||||||
|
|
||||||
parser.add_argument('type', type=int, default=0,
|
parser.add_argument('type', type=int, default=0,
|
||||||
|
@ -135,12 +137,11 @@ if __name__ == '__main__':
|
||||||
1: credential_pie,
|
1: credential_pie,
|
||||||
2: mail_pie
|
2: mail_pie
|
||||||
3: sqlinjection_pie,
|
3: sqlinjection_pie,
|
||||||
4: domain_pie,
|
4: iban_pie,''',
|
||||||
5: iban_pie,''',
|
choices=[0, 1, 2, 3, 4], action='store')
|
||||||
choices=[0, 1, 2, 3, 4, 5], action='store')
|
|
||||||
|
|
||||||
parser.add_argument('country', type=str, default="de",
|
parser.add_argument('country', type=str, default="lu",
|
||||||
help='''The country code, de:default''',
|
help='''The country code, lu:default''',
|
||||||
action='store')
|
action='store')
|
||||||
|
|
||||||
parser.add_argument('date', type=str, default="now",
|
parser.add_argument('date', type=str, default="now",
|
||||||
|
@ -148,7 +149,7 @@ if __name__ == '__main__':
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|
||||||
path = os.path.join(os.environ['AIL_HOME'], 'doc') # path to module config file
|
path = os.path.join(os.environ['AIL_HOME'], 'doc', 'statistics') # save path
|
||||||
|
|
||||||
config_section = 'ARDB_Statistics'
|
config_section = 'ARDB_Statistics'
|
||||||
|
|
||||||
|
@ -171,7 +172,7 @@ if __name__ == '__main__':
|
||||||
create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png')
|
create_pie_chart(args.country, 'mail_by_tld:', date, "AIL: mail leak by tld", path, 'AIL_mail_by_tld.png')
|
||||||
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png')
|
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: SQLInjection by tld", path, 'AIL_SQLInjection_by_tld.png')
|
||||||
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
|
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
|
||||||
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png')
|
create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')
|
||||||
elif args.type == 1:
|
elif args.type == 1:
|
||||||
create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png')
|
create_pie_chart(args.country, 'credential_by_tld:', date, "AIL: Credential leak by tld", path, 'AIL_credential_by_tld.png')
|
||||||
elif args.type == 2:
|
elif args.type == 2:
|
||||||
|
@ -179,6 +180,4 @@ if __name__ == '__main__':
|
||||||
elif args.type == 3:
|
elif args.type == 3:
|
||||||
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png')
|
create_pie_chart(args.country, 'SQLInjection_by_tld:', date, "AIL: sqlInjection by tld", path, 'AIL_sqlInjectionl_by_tld.png')
|
||||||
elif args.type == 4:
|
elif args.type == 4:
|
||||||
create_pie_chart(args.country.upper(), 'domain_by_tld:', date, "AIL: Domain by tld", path, 'AIL_domain_by_tld.png')
|
create_pie_chart(args.country.upper(), 'iban_by_country:', date, "AIL: Iban by country", path, 'AIL_iban_by_country.png')
|
||||||
elif args.type == 5:
|
|
||||||
create_pie_chart(args.country.upper(), 'iban_by_tld:', date, "AIL: Iban by tld", path, 'AIL_iban_by_tld.png')
|
|
Loading…
Reference in New Issue