diff --git a/bin/Attribute.py b/bin/Attributes.py similarity index 100% rename from bin/Attribute.py rename to bin/Attributes.py diff --git a/bin/CreditCard.py b/bin/CreditCards.py similarity index 97% rename from bin/CreditCard.py rename to bin/CreditCards.py index 430c6d7a..04ce9c62 100755 --- a/bin/CreditCard.py +++ b/bin/CreditCards.py @@ -66,7 +66,7 @@ if __name__ == "__main__": publisher.warning('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) #Send to duplicate - p.populate_set_out(filepath, 'Duplicate') + p.populate_set_out(filename, 'Duplicate') #send to Browse_warning_paste p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste') else: diff --git a/bin/Curve_manage_top_sets.py b/bin/CurveManageTopSets.py similarity index 84% rename from bin/Curve_manage_top_sets.py rename to bin/CurveManageTopSets.py index 34c1c238..8f316333 100755 --- a/bin/Curve_manage_top_sets.py +++ b/bin/CurveManageTopSets.py @@ -22,8 +22,8 @@ from pubsublogger import publisher from packages import lib_words import datetime import calendar - -from Helper import Process +import os +import ConfigParser # Config Variables Refresh_rate = 60*5 #sec @@ -96,13 +96,19 @@ if __name__ == '__main__': # Script is the default channel used for the modules. publisher.channel = 'Script' - config_section = 'CurveManageTopSets' - p = Process(config_section) + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + + cfg = ConfigParser.ConfigParser() + cfg.read(configfile) server_term = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_TermFreq", "host"), - port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + host=cfg.get("Redis_Level_DB_TermFreq", "host"), + port=cfg.getint("Redis_Level_DB_TermFreq", "port"), + db=cfg.getint("Redis_Level_DB_TermFreq", "db")) publisher.info("Script Curve_manage_top_set started") @@ -113,11 +119,6 @@ if __name__ == '__main__': while True: # Get one message from the input queue (module only work if linked with a queue) - message = p.get_from_set() - if message is None: - publisher.debug("{} queue is empty, waiting".format(config_section)) - print 'sleeping' - time.sleep(Refresh_rate) # sleep a long time then manage the set - manage_top_set() - continue + time.sleep(Refresh_rate) # sleep a long time then manage the set + manage_top_set() diff --git a/bin/Duplicate_ssdeep.py b/bin/Duplicate_ssdeep.py deleted file mode 100755 index 1b173eca..00000000 --- a/bin/Duplicate_ssdeep.py +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env python2 -# -*-coding:UTF-8 -* - -""" -The Duplicate module -==================== - -This huge module is, in short term, checking duplicates. - -Requirements: -------------- - - -""" -import redis -import os -import time -import datetime -import json -import ssdeep -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Duplicates' - save_dico_and_reload = 1 #min - time_1 = time.time() - flag_reload_from_disk = True - flag_write_to_disk = False - - p = Process(config_section) - - # REDIS # - # DB OBJECT & HASHS ( DISK ) - # FIXME increase flexibility - dico_redis = {} - for year in xrange(2013, datetime.date.today().year+1): - for month in xrange(0, 16): - dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), port=year, - db=month) - #print("dup: "+str(year)+str(month).zfill(2)+"\n") - - # FUNCTIONS # - publisher.info("Script duplicate started") - - dicopath = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "dicofilters")) - - dico_path_set = set() - while True: - try: - hash_dico = {} - dupl = [] - - x = time.time() - - message = p.get_from_set() - if message is not None: - path = message - PST = Paste.Paste(path) - else: - publisher.debug("Script Attribute is idling 10s") - time.sleep(10) - continue - - PST._set_p_hash_kind("ssdeep") - - # Assignate the correct redis connexion - r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] - - # Creating the dicor name: dicoyyyymm - filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year + - PST.p_date.month) - filedicopath_today = filedicopath - - # Save I/O - if time.time() - time_1 > save_dico_and_reload*60: - flag_write_to_disk = True - - if os.path.exists(filedicopath): - if flag_reload_from_disk == True: - flag_reload_from_disk = False - print 'Reloading' - with open(filedicopath, 'r') as fp: - today_dico = json.load(fp) - else: - today_dico = {} - with open(filedicopath, 'w') as fp: - json.dump(today_dico, fp) - - # For now, just use monthly dico - dico_path_set.add(filedicopath) - - # UNIQUE INDEX HASHS TABLE - yearly_index = str(datetime.date.today().year)+'00' - r_serv0 = dico_redis[yearly_index] - r_serv0.incr("current_index") - index = r_serv0.get("current_index")+str(PST.p_date) - - # For each dico - opened_dico = [] - for dico in dico_path_set: - # Opening dico - if dico == filedicopath_today: - opened_dico.append([dico, today_dico]) - else: - with open(dico, 'r') as fp: - opened_dico.append([dico, json.load(fp)]) - - - #retrieve hash from paste - paste_hash = PST._get_p_hash() - - # Go throught the Database of the dico (of the month) - threshold_dup = 99 - for dico_name, dico in opened_dico: - for dico_key, dico_hash in dico.items(): - percent = ssdeep.compare(dico_hash, paste_hash) - if percent > threshold_dup: - db = dico_name[-6:] - # Go throught the Database of the dico filter (month) - r_serv_dico = dico_redis[db] - - # index of paste - index_current = r_serv_dico.get(dico_hash) - paste_path = r_serv_dico.get(index_current) - if paste_path != None: - hash_dico[dico_hash] = (paste_path, percent) - - #print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent) - print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent) - - # Add paste in DB to prevent its analyse twice - # HASHTABLES PER MONTH (because of r_serv1 changing db) - r_serv1.set(index, PST.p_path) - r_serv1.sadd("INDEX", index) - # Adding the hash in Redis - r_serv1.set(paste_hash, index) - r_serv1.sadd("HASHS", paste_hash) - ##################### Similarity found ####################### - - # if there is data in this dictionnary - if len(hash_dico) != 0: - for dico_hash, paste_tuple in hash_dico.items(): - paste_path, percent = paste_tuple - dupl.append((paste_path, percent)) - - # Creating the object attribute and save it. - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - if dupl != []: - PST.__setattr__("p_duplicate", dupl) - PST.save_attribute_redis("p_duplicate", dupl) - publisher.info('{}Detected {}'.format(to_print, len(dupl))) - print '{}Detected {}'.format(to_print, len(dupl)) - - y = time.time() - - publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) - - - # Adding the hash in the dico of the month - today_dico[index] = paste_hash - - if flag_write_to_disk: - time_1 = time.time() - flag_write_to_disk = False - flag_reload_from_disk = True - print 'writing' - with open(filedicopath, 'w') as fp: - json.dump(today_dico, fp) - except IOError: - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - print "CRC Checksum Failed on :", PST.p_path - publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicates.py similarity index 100% rename from bin/Duplicate_ssdeep_v2.py rename to bin/Duplicates.py diff --git a/bin/Duplicate.py b/bin/Duplicates_old.py similarity index 100% rename from bin/Duplicate.py rename to bin/Duplicates_old.py diff --git a/bin/Helper.py b/bin/Helper.py index 78a1c94f..66d7766a 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -16,6 +16,7 @@ import ConfigParser import os import zmq import time +import datetime import json @@ -132,7 +133,25 @@ class Process(object): in_set = self.subscriber_name + 'in' self.r_temp.hset('queues', self.subscriber_name, int(self.r_temp.scard(in_set))) - return self.r_temp.spop(in_set) + message = self.r_temp.spop(in_set) + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) + dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') + + if message is None: + return None + + else: + try: + path = message.split(".")[-2].split("/")[-1] + value = str(timestamp) + ", " + path + self.r_temp.set("MODULE_"+self.subscriber_name, value) + return message + + except: + path = "?" + value = str(timestamp) + ", " + path + self.r_temp.set("MODULE_"+self.subscriber_name, value) + return message def populate_set_out(self, msg, channel=None): # multiproc diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index fef9c3ce..024b22e4 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -114,31 +114,31 @@ function launching_scripts { screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' + screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' + screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' + screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' + screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' + screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' + screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' + screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' sleep 0.1 @@ -158,7 +158,9 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0; read x' } diff --git a/bin/Line.py b/bin/Lines.py similarity index 100% rename from bin/Line.py rename to bin/Lines.py diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py new file mode 100755 index 00000000..8fcd8c78 --- /dev/null +++ b/bin/ModuleInformation.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +import time +import datetime +import redis +import os +import signal +import argparse +from subprocess import PIPE, Popen +import ConfigParser +import json +from terminaltables import AsciiTable +import textwrap + +# CONFIG VARIABLES +threshold_stucked_module = 60*60*1 #1 hour +log_filename = "../logs/moduleInfo.log" +command_search_pid = "ps a -o pid,cmd | grep {}" +command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" + + +def getPid(module): + p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + for line in p.stdout: + splittedLine = line.split() + if 'python2' in splittedLine: + return int(splittedLine[0]) + else: + return None + + +def kill_module(module): + print '' + print '-> trying to kill module:', module + + pid = getPid(module) + if pid is not None: + os.kill(pid, signal.SIGUSR1) + time.sleep(1) + if getPid(module) is None: + print module, 'has been killed' + print 'restarting', module, '...' + p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + + else: + print 'killing failed, retrying...' + time.sleep(3) + os.kill(pid, signal.SIGUSR1) + time.sleep(1) + if getPid(module) is None: + print module, 'has been killed' + print 'restarting', module, '...' + p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + else: + print 'killing failed!' + time.sleep(7) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.') + parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate') + parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)') + + args = parser.parse_args() + + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + + cfg = ConfigParser.ConfigParser() + cfg.read(configfile) + + # REDIS # + server = redis.StrictRedis( + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) + + while True: + + num = 0 + printarray1 = [] + printarray2 = [] + for queue, card in server.hgetall("queues").iteritems(): + key = "MODULE_" + queue + value = server.get(key) + if value is not None: + timestamp, path = value.split(", ") + if timestamp is not None and path is not None: + num += 1 + startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) + processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] + + if int(card) > 0: + if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module: + log = open(log_filename, 'a') + log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n") + if args.autokill == 1: + kill_module(queue) + + printarray1.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) + + else: + printarray2.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) + + printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) + printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) + printarray1.insert(0,["#", "Queue", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) + printarray2.insert(0,["#", "Queue", "Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) + + os.system('clear') + t1 = AsciiTable(printarray1, title="Working queues") + t1.column_max_width(1) + if not t1.ok: + longest_col = t1.column_widths.index(max(t1.column_widths)) + max_length_col = t1.column_max_width(longest_col) + if max_length_col > 0: + for i, content in enumerate(t1.table_data): + if len(content[longest_col]) > max_length_col: + temp = '' + for l in content[longest_col].splitlines(): + if len(l) > max_length_col: + temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' + else: + temp += l + '\n' + content[longest_col] = temp.strip() + t1.table_data[i] = content + + t2 = AsciiTable(printarray2, title="Idling queues") + t2.column_max_width(1) + if not t2.ok: + longest_col = t2.column_widths.index(max(t2.column_widths)) + max_length_col = t2.column_max_width(longest_col) + if max_length_col > 0: + for i, content in enumerate(t2.table_data): + if len(content[longest_col]) > max_length_col: + temp = '' + for l in content[longest_col].splitlines(): + if len(l) > max_length_col: + temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' + else: + temp += l + '\n' + content[longest_col] = temp.strip() + t2.table_data[i] = content + + + print t1.table + print '\n' + print t2.table + + time.sleep(args.refresh) diff --git a/bin/SentimentAnalyser.py b/bin/SentimentAnalysis.py similarity index 100% rename from bin/SentimentAnalyser.py rename to bin/SentimentAnalysis.py diff --git a/bin/Url.py b/bin/Web.py similarity index 100% rename from bin/Url.py rename to bin/Web.py diff --git a/bin/WebStats.py b/bin/WebStats.py index 6fdd9ee3..20c1f489 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -77,12 +77,14 @@ def compute_progression(server, field_name, num_day, url_parsed): member_set = [] for keyw in server.smembers(redis_progression_name_set): member_set.append((keyw, int(server.hget(redis_progression_name, keyw)))) - print member_set member_set.sort(key=lambda tup: tup[1]) if member_set[0][1] < keyword_increase: + print 'removing', member_set[0][0] + '('+str(member_set[0][1])+')', 'and adding', keyword, str(keyword_increase) #remove min from set and add the new one - server.srem(redis_progression_name_set, member_set[0]) + server.srem(redis_progression_name_set, member_set[0][0]) server.sadd(redis_progression_name_set, keyword) + server.hdel(redis_progression_name, member_set[0][0]) + server.hset(redis_progression_name, keyword, keyword_increase) if __name__ == '__main__': diff --git a/bin/launch_scripts.sh b/bin/launch_scripts.sh index ad55244a..1cdde370 100755 --- a/bin/launch_scripts.sh +++ b/bin/launch_scripts.sh @@ -8,50 +8,52 @@ sleep 0.1 echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT -screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x' diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 7434def8..0243038a 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -34,7 +34,7 @@ subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve [CreditCards] -subscribe = Redis_CreditCard +subscribe = Redis_CreditCards publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste [Mail] diff --git a/doc/generate_graph_data.py b/doc/generate_graph_data.py new file mode 100755 index 00000000..cff1f538 --- /dev/null +++ b/doc/generate_graph_data.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +content = "" +modules = {} +all_modules = [] +curr_module = "" +streamingPub = {} +streamingSub = {} + +with open('../bin/packages/modules.cfg', 'r') as f: + for line in f: + if line[0] != '#': + if line[0] == '[': + curr_name = line.replace('[','').replace(']','').replace('\n', '').replace(' ', '') + all_modules.append(curr_name) + modules[curr_name] = {'sub': [], 'pub': []} + curr_module = curr_name + elif curr_module != "": # searching for sub or pub + if line.startswith("subscribe"): + curr_subscribers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')] + modules[curr_module]['sub'] = curr_subscribers + for sub in curr_subscribers: + streamingSub[sub] = curr_module + + elif line.startswith("publish"): + curr_publishers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')] + modules[curr_module]['pub'] = curr_publishers + for pub in curr_publishers: + streamingPub[pub] = curr_module + else: + continue + + output_set_graph = set() + + for module in modules.keys(): + for stream_in in modules[module]['sub']: + if stream_in not in streamingPub.keys(): + output_set_graph.add("\"" + stream_in + "\" [color=darkorange1] ;\n") + output_set_graph.add("\"" + stream_in + "\"" + "->" + module + ";\n") + else: + output_set_graph.add("\"" + streamingPub[stream_in] + "\"" + "->" + module + ";\n") + + for stream_out in modules[module]['pub']: + if stream_out not in streamingSub.keys(): + output_set_graph.add("\"" + stream_out + "\" [color=darkorange1] ;\n") + output_set_graph.add("\"" + stream_out + "\"" + "->" + module + ";\n") + else: + output_set_graph.add("\"" + module + "\"" + "->" + streamingSub[stream_out] + ";\n") + + + output_text_graph = "" + output_text_graph += "digraph unix {\n"\ + "graph [pad=\"0.5\"];\n"\ + "size=\"25,25\";\n"\ + "node [color=lightblue2, style=filled];\n" + + for elem in output_set_graph: + output_text_graph += elem + + output_text_graph += "}" + print output_text_graph diff --git a/doc/generate_modules_data_flow_graph.sh b/doc/generate_modules_data_flow_graph.sh new file mode 100755 index 00000000..90a578e0 --- /dev/null +++ b/doc/generate_modules_data_flow_graph.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python generate_graph_data.py | dot -T png -o module-data-flow.png diff --git a/doc/module-data-flow.png b/doc/module-data-flow.png new file mode 100644 index 00000000..73ed3b1d Binary files /dev/null and b/doc/module-data-flow.png differ diff --git a/doc/screenshots/sentiment.png b/doc/screenshots/sentiment.png deleted file mode 100644 index c4edee90..00000000 Binary files a/doc/screenshots/sentiment.png and /dev/null differ diff --git a/installing_deps.sh b/installing_deps.sh index 6af62795..8b790f1f 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -17,6 +17,9 @@ sudo apt-get install libadns1 libadns1-dev #Needed for redis-lvlDB sudo apt-get install libev-dev libgmp-dev +#Need for generate-data-flow graph +sudo apt-get install graphviz + #needed for mathplotlib test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ sudo easy_install -U distribute @@ -69,6 +72,7 @@ echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate mkdir -p $AIL_HOME/{PASTES,Blooms,dumps} mkdir -p $AIL_HOME/LEVEL_DB_DATA/2016 +mkdir -p $AIL_HOME/LEVEL_DB_DATA/3016 pip install -U pip pip install -r pip_packages_requirement.txt diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt index bd734175..b80d14bd 100644 --- a/pip_packages_requirement.txt +++ b/pip_packages_requirement.txt @@ -10,6 +10,7 @@ textblob numpy matplotlib networkx +terminaltables #Tokeniser nltk diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 45efd8f1..8b6e05e3 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -81,8 +81,22 @@ def event_stream(): def get_queues(r): # We may want to put the llen in a pipeline to do only one query. - return [(queue, int(card)) for queue, card in - r.hgetall("queues").iteritems()] + data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()] + newData = [] + for queue, card in data: + key = "MODULE_" + queue + value = r.get(key) + if value is not None: + timestamp, path = value.split(", ") + if timestamp is not None: + startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) + processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] + seconds = int((datetime.datetime.now() - startTime_readable).total_seconds()) + newData.append( (queue, card, seconds) ) + else: + newData.append( (queue, cards, 0) ) + + return newData def list_len(s): diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index f0cd101f..359a1e56 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -221,11 +221,17 @@ function create_queue_table() { for(i = 0; i < (glob_tabvar.row1).length;i++){ var tr = document.createElement('TR') - for(j = 0; j < (glob_tabvar.row1[i]).length; j++){ + for(j = 0; j < 2; j++){ var td = document.createElement('TD') td.appendChild(document.createTextNode(glob_tabvar.row1[i][j])); tr.appendChild(td) } + if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2) + tr.className += " danger"; + else if (parseInt(glob_tabvar.row1[i][2]) > 60*1) + tr.className += " warning"; + else + tr.className += " success"; tableBody.appendChild(tr); } Tablediv.appendChild(table);