diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index 02c4a38b..a08f3b79 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -78,7 +78,7 @@ function helptext { [-k | --killAll] Kill DB + Scripts [-ks | --killscript] Scripts [-u | --update] Update AIL - [-ut | --thirdpartyUpdate] Update Web + [-ut | --thirdpartyUpdate] Update UI/Frontend [-t | --test] Launch Tests [-rp | --resetPassword] Reset Password [-f | --launchFeeder] LAUNCH Pystemon feeder @@ -180,7 +180,7 @@ function launching_scripts { sleep 0.1 screen -S "Script_AIL" -X screen -t "ApiKey" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ApiKey.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "Web" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Web.py; read x" + screen -S "Script_AIL" -X screen -t "Urls" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Urls.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "Credential" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Credential.py; read x" sleep 0.1 @@ -210,8 +210,6 @@ function launching_scripts { #sleep 0.1 screen -S "Script_AIL" -X screen -t "Cve" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./Cve.py; read x" sleep 0.1 - screen -S "Script_AIL" -X screen -t "WebStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./WebStats.py; read x" - sleep 0.1 screen -S "Script_AIL" -X screen -t "ModuleStats" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./ModuleStats.py; read x" sleep 0.1 screen -S "Script_AIL" -X screen -t "SQLInjectionDetection" bash -c "cd ${AIL_BIN}; ${ENV_PY} ./SQLInjectionDetection.py; read x" diff --git a/bin/Urls.py b/bin/Urls.py new file mode 100755 index 00000000..60816089 --- /dev/null +++ b/bin/Urls.py @@ -0,0 +1,86 @@ +#!/usr/bin/env python3 +# -*-coding:UTF-8 -* + +""" +The Urls Module +============================ + +This module extract URLs from an item and send them to others modules. + +""" + +################################## +# Import External packages +################################## +import redis +import pprint +import time +import os +from pyfaup.faup import Faup +import re + +################################## +# Import Project packages +################################## +from module.abstract_module import AbstractModule +from packages.Item import Item +from packages import lib_refine +from Helper import Process + + +class Urls(AbstractModule): + """ + Urls module for AIL framework + """ + + def __init__(self): + """ + Init Urls + """ + super(Urls, self).__init__() + + # FUNCTIONS # + self.faup = Faup() + + # Protocol file path + protocolsfile_path = os.path.join(os.environ['AIL_HOME'], + self.process.config.get("Directories", "protocolsfile")) + # Get all uri from protocolsfile (Used for Curve) + uri_scheme = "" + with open(protocolsfile_path, 'r') as scheme_file: + for scheme in scheme_file: + uri_scheme += scheme[:-1]+"|" + uri_scheme = uri_scheme[:-1] + + self.url_regex = "((?i:"+uri_scheme + \ + ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" + + # Send module state to logs + self.redis_logger.info(f"Module {self.module_name} initialized") + + + def compute(self, message): + """ + Search for Web links from given message + """ + # Extract item + id, score = message.split() + + item = Item(id) + + l_urls = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.url_regex, item.get_id(), item.get_content()) + if len(urls) > 0: + to_print = f'Urls;{item.get_source()};{item.get_date()};{item.get_basename()};' + self.redis_logger.info(f'{to_print}Detected {len(urls)} URL;{item.get_id()}') + + for url in l_urls: + # # TODO: FIXME handle .foundation .dev onion? i2p? + + to_send = f"{url} {item.get_id()}" + self.send_message_to_queue(to_send, 'Url') + self.redis_logger.debug(f"url_parsed: {to_send}") + +if __name__ == '__main__': + + module = Urls() + module.run() diff --git a/bin/Web.py b/bin/Web.py deleted file mode 100755 index cbc07f2c..00000000 --- a/bin/Web.py +++ /dev/null @@ -1,184 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The Web Module -============================ - -This module tries to parse URLs and warns if some defined contry code are present. - -""" - -################################## -# Import External packages -################################## -import redis -import pprint -import time -import os -import dns.exception -from pyfaup.faup import Faup -import re -# Country and ASN lookup -from cymru.ip2asn.dns import DNSClient as ip2asn -import socket -import pycountry -import ipaddress - -################################## -# Import Project packages -################################## -from module.abstract_module import AbstractModule -from packages import Paste -from packages import lib_refine -from Helper import Process - - -class Web(AbstractModule): - """ - Web module for AIL framework - """ - - # Used to prevent concat with empty fields due to url parsing - def avoidNone(self, a_string): - if a_string is None: - return "" - else: - return a_string - - def __init__(self): - """ - Init Web - """ - super(Web, self).__init__(logger_channel='script:web') - - # REDIS Cache - self.r_serv2 = redis.StrictRedis( - host=self.process.config.get("Redis_Cache", "host"), - port=self.process.config.getint("Redis_Cache", "port"), - db=self.process.config.getint("Redis_Cache", "db"), - decode_responses=True) - - # Country to log as critical - self.cc_critical = self.process.config.get("Url", "cc_critical") - - # FUNCTIONS # - - self.faup = Faup() - - # Protocol file path - protocolsfile_path = os.path.join(os.environ['AIL_HOME'], - self.process.config.get("Directories", "protocolsfile")) - # Get all uri from protocolsfile (Used for Curve) - uri_scheme = "" - with open(protocolsfile_path, 'r') as scheme_file: - for scheme in scheme_file: - uri_scheme += scheme[:-1]+"|" - uri_scheme = uri_scheme[:-1] - - self.url_regex = "((?i:"+uri_scheme + \ - ")\://(?:[a-zA-Z0-9\.\-]+(?:\:[a-zA-Z0-9\.&%\$\-]+)*@)*(?:(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9])\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[1-9]|0)\.(?:25[0-5]|2[0-4][0-9]|[0-1]{1}[0-9]{2}|[1-9]{1}[0-9]{1}|[0-9])|localhost|(?:[a-zA-Z0-9\-]+\.)*[a-zA-Z0-9\-]+\.(?:com|edu|gov|int|mil|net|org|biz|arpa|info|name|pro|aero|coop|museum|[a-zA-Z]{2}))(?:\:[0-9]+)*(?:/(?:$|[a-zA-Z0-9\.\,\?\'\\\+&%\$#\=~_\-]+))*)" - - self.prec_filename = None - - # Send module state to logs - self.redis_logger.info(f"Module {self.module_name} initialized") - - - def compute(self, message): - """ - Search for Web links from given message - """ - # Extract item - filename, score = message.split() - - domains_list = set() - hosts_list = set() - - if self.prec_filename is None or filename != self.prec_filename: - domains_list.clear() - hosts_list.clear() - - PST = Paste.Paste(filename) - client = ip2asn() - - detected_urls = PST.get_regex(self.url_regex) - if len(detected_urls) > 0: - to_print = f'Web;{PST.p_source};{PST.p_date};{PST.p_name};' - self.redis_logger.info(f'{to_print}Detected {len(detected_urls)} URL;{PST.p_rel_path}') - - for url in detected_urls: - - if url.endswith(".on"): - # URL is an onion link skip - # TODO send to TOR crawler ? - # self.redis_logger.debug("Skip onion link") - continue - - self.redis_logger.debug(f"match regex: {url}") - - to_send = f"{url} {PST._get_p_date()} {filename}" - self.process.populate_set_out(to_send, 'Url') - self.redis_logger.debug(f"url_parsed: {to_send}") - - self.faup.decode(url) - domain = self.faup.get_domain() - subdomain = self.faup.get_subdomain() - - self.redis_logger.debug(f'{url} Published') - - domains_list.add(domain) - - hostl = f'{subdomain}.{domain}' if subdomain else domain - - if hostl not in hosts_list: - # test host only once a host in a paste - hosts_list.add(hostl) - - try: - socket.setdefaulttimeout(1) - ip = socket.gethostbyname(hostl) - # If the resolver is not giving any IPv4 address, - # ASN/CC lookup is skip. - l = client.lookup(ip, qType='IP') - except ipaddress.AddressValueError: - self.redis_logger.debug( - f'ASN/CC lookup failed for IP {ip}') - continue - except: - self.redis_logger.debug( - f'Resolver IPv4 address failed for host {hostl}') - continue - - cc = getattr(l, 'cc') - asn = '' - if getattr(l, 'asn') is not None: - asn = getattr(l, 'asn')[2:] # remobe b' - - # EU is not an official ISO 3166 code (but used by RIPE - # IP allocation) - if cc is not None and cc != "EU": - countryname = pycountry.countries.get(alpha_2=cc).name - self.redis_logger.debug(f'{hostl};{asn};{cc};{countryname}') - if cc == self.cc_critical: - to_print = f'Url;{PST.p_source};{PST.p_date};{PST.p_name};Detected {hostl} {cc}' - self.redis_logger.info(to_print) - else: - self.redis_logger.debug(f'{hostl};{asn};{cc}') - - A_values = lib_refine.checking_A_record(self.r_serv2, - domains_list) - - if A_values[0] >= 1: - - pprint.pprint(A_values) - # self.redis_logger.info('Url;{};{};{};Checked {} URL;{}'.format( - # PST.p_source, PST.p_date, PST.p_name, A_values[0], PST.p_rel_path)) - - self.prec_filename = filename - - -if __name__ == '__main__': - - module = Web() - module.run() diff --git a/bin/WebStats.py b/bin/WebStats.py deleted file mode 100755 index bec23aa1..00000000 --- a/bin/WebStats.py +++ /dev/null @@ -1,207 +0,0 @@ -#!/usr/bin/env python3 -# -*-coding:UTF-8 -* - -""" -The WebStats Module -====================== - -This module makes stats on URL recolted from the web module. -It consider the TLD, Domain and protocol. - -""" - -################################## -# Import External packages -################################## -import time -import datetime -import redis -import os -from pubsublogger import publisher -from pyfaup.faup import Faup - - -################################## -# Import Project packages -################################## -from module.abstract_module import AbstractModule -from packages import lib_words -from packages.Date import Date -from Helper import Process - - -class WebStats(AbstractModule): - """ - WebStats module for AIL framework - """ - - # Config Var - THRESHOLD_TOTAL_SUM = 200 # Above this value, a keyword is eligible for a progression - THRESHOLD_INCREASE = 1.0 # The percentage representing the keyword occurence since num_day_to_look - MAX_SET_CARDINALITY = 10 # The cardinality of the progression set - NUM_DAY_TO_LOOK = 5 # the detection of the progression start num_day_to_look in the past - - - def __init__(self): - super(WebStats, self).__init__() - - # Send module state to logs - self.redis_logger.info("Module %s initialized"%(self.module_name)) - # Sent to the logging a description of the module - self.redis_logger.info("Makes statistics about valid URL") - - self.pending_seconds = 5*60 - - # REDIS # - self.r_serv_trend = redis.StrictRedis( - host=self.process.config.get("ARDB_Trending", "host"), - port=self.process.config.get("ARDB_Trending", "port"), - db=self.process.config.get("ARDB_Trending", "db"), - decode_responses=True) - - # FILE CURVE SECTION # - self.csv_path_proto = os.path.join(os.environ['AIL_HOME'], - self.process.config.get("Directories", "protocolstrending_csv")) - self.protocolsfile_path = os.path.join(os.environ['AIL_HOME'], - self.process.config.get("Directories", "protocolsfile")) - - self.csv_path_tld = os.path.join(os.environ['AIL_HOME'], - self.process.config.get("Directories", "tldstrending_csv")) - self.tldsfile_path = os.path.join(os.environ['AIL_HOME'], - self.process.config.get("Directories", "tldsfile")) - - self.csv_path_domain = os.path.join(os.environ['AIL_HOME'], - self.process.config.get("Directories", "domainstrending_csv")) - - self.faup = Faup() - self.generate_new_graph = False - - - def computeNone(self): - if self.generate_new_graph: - self.generate_new_graph = False - - today = datetime.date.today() - year = today.year - month = today.month - - self.redis_logger.debug('Building protocol graph') - lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_proto, - self.protocolsfile_path, year, - month) - - self.redis_logger.debug('Building tld graph') - lib_words.create_curve_with_word_file(self.r_serv_trend, self.csv_path_tld, - self.tldsfile_path, year, - month) - - self.redis_logger.debug('Building domain graph') - lib_words.create_curve_from_redis_set(self.r_serv_trend, self.csv_path_domain, - "domain", year, - month) - self.redis_logger.debug('end building') - - - def compute(self, message): - self.generate_new_graph = True - - # Do something with the message from the queue - url, date, path = message.split() - self.faup.decode(url) - url_parsed = self.faup.get() - - # Scheme analysis - self.analyse('scheme', date, url_parsed) - # Tld analysis - self.analyse('tld', date, url_parsed) - # Domain analysis - self.analyse('domain', date, url_parsed) - - self.compute_progression('scheme', self.NUM_DAY_TO_LOOK, url_parsed) - self.compute_progression('tld', self.NUM_DAY_TO_LOOK, url_parsed) - self.compute_progression('domain', self.NUM_DAY_TO_LOOK, url_parsed) - - - def analyse(self, field_name, date, url_parsed): - field = url_parsed[field_name] - - if field is not None: - try: # faup version - field = field.decode() - except: - pass - - self.r_serv_trend.hincrby(field, date, 1) - - if field_name == "domain": #save domain in a set for the monthly plot - domain_set_name = "domain_set_" + date[0:6] - self.r_serv_trend.sadd(domain_set_name, field) - self.redis_logger.debug("added in " + domain_set_name +": "+ field) - - - def get_date_range(self, num_day): - curr_date = datetime.date.today() - date = Date(str(curr_date.year)+str(curr_date.month).zfill(2)+str(curr_date.day).zfill(2)) - date_list = [] - - for i in range(0, num_day+1): - date_list.append(date.substract_day(i)) - return date_list - - - def compute_progression_word(self, num_day, keyword): - """ - Compute the progression for one keyword - """ - date_range = self.get_date_range(num_day) - # check if this keyword is eligible for progression - keyword_total_sum = 0 - value_list = [] - for date in date_range: # get value up to date_range - curr_value = self.r_serv_trend.hget(keyword, date) - value_list.append(int(curr_value if curr_value is not None else 0)) - keyword_total_sum += int(curr_value) if curr_value is not None else 0 - oldest_value = value_list[-1] if value_list[-1] != 0 else 1 #Avoid zero division - - # The progression is based on the ratio: value[i] / value[i-1] - keyword_increase = 0 - value_list_reversed = value_list[:] - value_list_reversed.reverse() - for i in range(1, len(value_list_reversed)): - divisor = value_list_reversed[i-1] if value_list_reversed[i-1] != 0 else 1 - keyword_increase += value_list_reversed[i] / divisor - - return (keyword_increase, keyword_total_sum) - - - def compute_progression(self, field_name, num_day, url_parsed): - """ - recompute the set top_progression zset - - Compute the current field progression - - re-compute the current progression for each first 2*self.MAX_SET_CARDINALITY fields in the top_progression_zset - """ - redis_progression_name_set = "z_top_progression_"+field_name - - keyword = url_parsed[field_name] - if keyword is not None: - - #compute the progression of the current word - keyword_increase, keyword_total_sum = self.compute_progression_word(num_day, keyword) - - #re-compute the progression of 2*self.MAX_SET_CARDINALITY - current_top = self.r_serv_trend.zrevrangebyscore(redis_progression_name_set, '+inf', '-inf', withscores=True, start=0, num=2*self.MAX_SET_CARDINALITY) - for word, value in current_top: - word_inc, word_tot_sum = self.compute_progression_word(num_day, word) - self.r_serv_trend.zrem(redis_progression_name_set, word) - if (word_tot_sum > self.THRESHOLD_TOTAL_SUM) and (word_inc > self.THRESHOLD_INCREASE): - self.r_serv_trend.zadd(redis_progression_name_set, float(word_inc), word) - - # filter before adding - if (keyword_total_sum > self.THRESHOLD_TOTAL_SUM) and (keyword_increase > self.THRESHOLD_INCREASE): - self.r_serv_trend.zadd(redis_progression_name_set, float(keyword_increase), keyword) - - -if __name__ == '__main__': - - module = WebStats() - module.run() diff --git a/bin/ApiKey.py b/bin/modules/ApiKey.py similarity index 96% rename from bin/ApiKey.py rename to bin/modules/ApiKey.py index cf846d3b..42ea0c7a 100755 --- a/bin/ApiKey.py +++ b/bin/modules/ApiKey.py @@ -12,9 +12,13 @@ Search for API keys on an item content. """ import re +import os +import sys + +sys.path.append(os.path.join(os.environ['AIL_BIN'])) # project packages -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages.Item import Item from lib import regex_helper diff --git a/bin/Categ.py b/bin/modules/Categ.py similarity index 97% rename from bin/Categ.py rename to bin/modules/Categ.py index 818d335c..4e8b6205 100755 --- a/bin/Categ.py +++ b/bin/modules/Categ.py @@ -33,14 +33,16 @@ Requirements ################################## # Import External packages ################################## -import os import argparse +import os import re +import sys +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages.Item import Item diff --git a/bin/Credential.py b/bin/modules/Credential.py similarity index 75% rename from bin/Credential.py rename to bin/modules/Credential.py index 68fd24aa..2b7a37aa 100755 --- a/bin/Credential.py +++ b/bin/modules/Credential.py @@ -26,27 +26,22 @@ Redis organization: ################################## # Import External packages ################################## -import time import os import sys -import datetime +import time import re import redis +from datetime import datetime from pyfaup.faup import Faup -from pubsublogger import publisher -import lib.regex_helper as regex_helper -import signal - +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule -from Helper import Process -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'packages')) -import Item -sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib/')) -import ConfigLoader +from modules.abstract_module import AbstractModule +from packages.Item import Item +from lib import ConfigLoader +from lib import regex_helper class Credential(AbstractModule): @@ -54,8 +49,6 @@ class Credential(AbstractModule): Credential module for AIL framework """ - max_execution_time = 30 - # Split username with spec. char or with upper case, distinguish start with upper REGEX_CRED = "[a-z]+|[A-Z]{3,}|[A-Z]{1,2}[a-z]+|[0-9]+" REDIS_KEY_NUM_USERNAME = 'uniqNumForUsername' @@ -79,12 +72,15 @@ class Credential(AbstractModule): self.redis_cache_key = regex_helper.generate_redis_cache_key(self.module_name) # Database - self.server_cred = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_TermCred") - self.server_statistics = ConfigLoader.ConfigLoader().get_redis_conn("ARDB_Statistics") + config_loader = ConfigLoader.ConfigLoader() + self.server_cred = config_loader.get_redis_conn("ARDB_TermCred") + self.server_statistics = config_loader.get_redis_conn("ARDB_Statistics") # Config values - self.minimumLengthThreshold = ConfigLoader.ConfigLoader().get_config_int("Credential", "minimumLengthThreshold") - self.criticalNumberToAlert = ConfigLoader.ConfigLoader().get_config_int("Credential", "criticalNumberToAlert") + self.minimumLengthThreshold = config_loader.get_config_int("Credential", "minimumLengthThreshold") + self.criticalNumberToAlert = config_loader.get_config_int("Credential", "criticalNumberToAlert") + + self.max_execution_time = 30 # Waiting time in secondes between to message proccessed self.pending_seconds = 10 @@ -95,38 +91,39 @@ class Credential(AbstractModule): def compute(self, message): - item_id, count = message.split() + id, count = message.split() + item = Item(id) - item_content = Item.get_item_content(item_id) + item_content = item.get_content() # Extract all credentials - all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item_id, item_content, max_time=Credential.max_execution_time) + all_credentials = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_cred, item.get_id(), item_content, max_time=self.max_execution_time) - if all_credentials: + if all_credentials: nb_cred = len(all_credentials) message = f'Checked {nb_cred} credentials found.' - - all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item_id, item_content, max_time=Credential.max_execution_time) + + all_sites = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_web, item.get_id(), item_content, max_time=self.max_execution_time) if all_sites: discovered_sites = ', '.join(all_sites) message += f' Related websites: {discovered_sites}' - - self.redis_logger.debug(message) - to_print = f'Credential;{Item.get_source(item_id)};{Item.get_item_date(item_id)};{Item.get_item_basename(item_id)};{message};{item_id}' + print(message) + + to_print = f'Credential;{item.get_source()};{item.get_date()};{item.get_basename()};{message};{item.get_id()}' #num of creds above tresh, publish an alert if nb_cred > self.criticalNumberToAlert: - self.redis_logger.debug(f"========> Found more than 10 credentials in this file : {item_id}") + print(f"========> Found more than 10 credentials in this file : {item.get_id()}") self.redis_logger.warning(to_print) - + # Send to duplicate - self.process.populate_set_out(item_id, 'Duplicate') + self.send_message_to_queue(item.get_id(), 'Duplicate') - msg = f'infoleak:automatic-detection="credential";{item_id}' - self.process.populate_set_out(msg, 'Tags') + msg = f'infoleak:automatic-detection="credential";{item.get_id()}' + self.send_message_to_queue(msg, 'Tags') - site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item_id, item_content, max_time=Credential.max_execution_time, r_set=False) + site_occurence = regex_helper.regex_findall(self.module_name, self.redis_cache_key, self.regex_site_for_stats, item.get_id(), item_content, max_time=self.max_execution_time, r_set=False) creds_sites = {} @@ -140,7 +137,7 @@ class Credential(AbstractModule): for url in all_sites: self.faup.decode(url) domain = self.faup.get()['domain'] - ## TODO: # FIXME: remove me + ## TODO: # FIXME: remove me, check faup versionb try: domain = domain.decode() except: @@ -153,14 +150,14 @@ class Credential(AbstractModule): for site, num in creds_sites.items(): # Send for each different site to moduleStats mssg = f'credential;{num};{site};{Item.get_item_date(item_id)}' - self.redis_logger.debug(mssg) - self.process.populate_set_out(mssg, 'ModuleStats') + print(mssg) + self.send_message_to_queue(msg, 'ModuleStats') if all_sites: discovered_sites = ', '.join(all_sites) - self.redis_logger.debug(f"=======> Probably on : {discovered_sites}") + print(f"=======> Probably on : {discovered_sites}") - date = datetime.datetime.now().strftime("%Y%m") + date = datetime.now().strftime("%Y%m") for cred in all_credentials: maildomains = re.findall("@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,20}", cred.lower())[0] self.faup.decode(maildomains) @@ -173,7 +170,7 @@ class Credential(AbstractModule): self.server_statistics.hincrby('credential_by_tld:'+date, tld, 1) else: self.redis_logger.info(to_print) - self.redis_logger.debug(f'found {nb_cred} credentials') + print(f'found {nb_cred} credentials') # For searching credential in termFreq for cred in all_credentials: @@ -181,8 +178,8 @@ class Credential(AbstractModule): # unique number attached to unique path uniq_num_path = self.server_cred.incr(Credential.REDIS_KEY_NUM_PATH) - self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item_id: uniq_num_path}) - self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item_id}) + self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET, {item.get_id(): uniq_num_path}) + self.server_cred.hmset(Credential.REDIS_KEY_ALL_PATH_SET_REV, {uniq_num_path: item.get_id()}) # unique number attached to unique username uniq_num_cred = self.server_cred.hget(Credential.REDIS_KEY_ALL_CRED_SET, cred) @@ -204,6 +201,6 @@ class Credential(AbstractModule): if __name__ == '__main__': - + module = Credential() module.run() diff --git a/bin/CreditCards.py b/bin/modules/CreditCards.py similarity index 96% rename from bin/CreditCards.py rename to bin/modules/CreditCards.py index 9ea3cf33..8aa1415e 100755 --- a/bin/CreditCards.py +++ b/bin/modules/CreditCards.py @@ -14,14 +14,16 @@ It apply credit card regexes on item content and warn if a valid card number is ################################## # Import External packages ################################## +import os import re import sys import time +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages.Item import Item from packages import lib_refine diff --git a/bin/Decoder.py b/bin/modules/Decoder.py similarity index 93% rename from bin/Decoder.py rename to bin/modules/Decoder.py index 6844afd3..b5bf3f36 100755 --- a/bin/Decoder.py +++ b/bin/modules/Decoder.py @@ -17,21 +17,21 @@ from hashlib import sha1 import magic import json import datetime -from pubsublogger import publisher import re import signal -from lib import Decoded - +import sys +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from Helper import Process from packages import Item -import ConfigLoader - +from lib import ConfigLoader +from lib import Decoded +# # TODO: use regex_helper class TimeoutException(Exception): pass @@ -138,6 +138,8 @@ class Decoder(AbstractModule): if not mimetype: self.redis_logger.debug(item_id) self.redis_logger.debug(sha1_string) + print(item_id) + print(sha1_string) raise Exception('Invalid mimetype') Decoded.save_decoded_file_content(sha1_string, decoded_file, item_date, mimetype=mimetype) Decoded.save_item_relationship(sha1_string, item_id) @@ -147,6 +149,7 @@ class Decoder(AbstractModule): content = content.replace(encoded, '', 1) self.redis_logger.debug(f'{item_id} : {decoder_name} - {mimetype}') + print(f'{item_id} : {decoder_name} - {mimetype}') if(find): self.set_out_item(decoder_name, item_id) @@ -156,15 +159,15 @@ class Decoder(AbstractModule): def set_out_item(self, decoder_name, item_id): self.redis_logger.warning(f'{decoder_name} decoded') - + # Send to duplicate - self.process.populate_set_out(item_id, 'Duplicate') + self.send_message_to_queue(item_id, 'Duplicate') # Send to Tags msg = f'infoleak:automatic-detection="{decoder_name}";{item_id}' - self.process.populate_set_out(msg, 'Tags') + self.send_message_to_queue(msg, 'Tags') if __name__ == '__main__': - + module = Decoder() module.run() diff --git a/bin/DomClassifier.py b/bin/modules/DomClassifier.py similarity index 97% rename from bin/DomClassifier.py rename to bin/modules/DomClassifier.py index c0ab43be..240916eb 100755 --- a/bin/DomClassifier.py +++ b/bin/modules/DomClassifier.py @@ -18,10 +18,11 @@ import sys import time import DomainClassifier.domainclassifier +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages.Item import Item sys.path.append(os.path.join(os.environ['AIL_BIN'], 'lib')) diff --git a/bin/Global.py b/bin/modules/Global.py similarity index 98% rename from bin/Global.py rename to bin/modules/Global.py index 49ccb88c..c203d6c8 100755 --- a/bin/Global.py +++ b/bin/modules/Global.py @@ -36,10 +36,11 @@ import redis from hashlib import md5 from uuid import uuid4 +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from lib.ConfigLoader import ConfigLoader diff --git a/bin/Indexer.py b/bin/modules/Indexer.py similarity index 97% rename from bin/Indexer.py rename to bin/modules/Indexer.py index 07f06a71..f7218ae1 100755 --- a/bin/Indexer.py +++ b/bin/modules/Indexer.py @@ -15,17 +15,18 @@ and index each file with a full-text indexer (Whoosh until now). import time import shutil import os +import sys from os.path import join, getsize from whoosh.index import create_in, exists_in, open_dir from whoosh.fields import Schema, TEXT, ID +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages import Paste -from Helper import Process class Indexer(AbstractModule): @@ -102,6 +103,7 @@ class Indexer(AbstractModule): docpath = message.split(" ", -1)[-1] paste = PST.get_p_content() self.redis_logger.debug(f"Indexing - {self.indexname}: {docpath}") + print(f"Indexing - {self.indexname}: {docpath}") # Avoid calculating the index's size at each message if(time.time() - self.last_refresh > self.TIME_WAIT): diff --git a/bin/Keys.py b/bin/modules/Keys.py similarity index 98% rename from bin/Keys.py rename to bin/modules/Keys.py index 8d42e6cd..02cc3b12 100755 --- a/bin/Keys.py +++ b/bin/modules/Keys.py @@ -15,13 +15,16 @@ RSA private key, certificate messages ################################## # Import External packages ################################## +import os +import sys import time from enum import Enum +sys.path.append(os.environ['AIL_BIN']) ################################## # Import Project packages ################################## -from module.abstract_module import AbstractModule +from modules.abstract_module import AbstractModule from packages.Item import Item diff --git a/bin/Onion.py b/bin/modules/Onion.py similarity index 97% rename from bin/Onion.py rename to bin/modules/Onion.py index 84af1000..afd417cc 100755 --- a/bin/Onion.py +++ b/bin/modules/Onion.py @@ -19,8 +19,11 @@ import os import sys import re -# project packages -from module.abstract_module import AbstractModule +sys.path.append(os.environ['AIL_BIN']) +################################## +# Import Project packages +################################## +from modules.abstract_module import AbstractModule from lib.ConfigLoader import ConfigLoader from lib import crawlers from lib import regex_helper diff --git a/bin/Phone.py b/bin/modules/Phone.py similarity index 100% rename from bin/Phone.py rename to bin/modules/Phone.py diff --git a/bin/SentimentAnalysis.py b/bin/modules/SentimentAnalysis.py similarity index 100% rename from bin/SentimentAnalysis.py rename to bin/modules/SentimentAnalysis.py diff --git a/bin/Tags.py b/bin/modules/Tags.py similarity index 100% rename from bin/Tags.py rename to bin/modules/Tags.py diff --git a/bin/module/__init__.py b/bin/modules/__init__.py similarity index 100% rename from bin/module/__init__.py rename to bin/modules/__init__.py diff --git a/bin/module/abstract_module.py b/bin/modules/abstract_module.py similarity index 100% rename from bin/module/abstract_module.py rename to bin/modules/abstract_module.py diff --git a/bin/submit_paste.py b/bin/modules/submit_paste.py similarity index 100% rename from bin/submit_paste.py rename to bin/modules/submit_paste.py diff --git a/bin/TermTrackerMod.py b/bin/trackers/Tracker_Term.py similarity index 100% rename from bin/TermTrackerMod.py rename to bin/trackers/Tracker_Term.py diff --git a/tests/test_modules.py b/tests/test_modules.py index 8ae47792..ce685b9c 100644 --- a/tests/test_modules.py +++ b/tests/test_modules.py @@ -11,13 +11,13 @@ from base64 import b64encode sys.path.append(os.environ['AIL_BIN']) # Modules Classes -from ApiKey import ApiKey -from Categ import Categ -from CreditCards import CreditCards -from DomClassifier import DomClassifier -from Global import Global -from Keys import Keys -from Onion import Onion +from modules.ApiKey import ApiKey +from modules.Categ import Categ +from modules.CreditCards import CreditCards +from modules.DomClassifier import DomClassifier +from modules.Global import Global +from modules.Keys import Keys +from modules.Onion import Onion # project packages import lib.crawlers as crawlers diff --git a/bin/import_dir.py b/tools/import_dir.py similarity index 100% rename from bin/import_dir.py rename to tools/import_dir.py