diff --git a/README.md b/README.md index 0dd7a325..95398f3b 100644 --- a/README.md +++ b/README.md @@ -9,8 +9,34 @@ AIL framework - Framework for Analysis of Information Leaks AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information. -![Dashboard](./doc/screenshots/DashboardAIL.png?raw=true "AIL framework dashboard") -![Trending](./doc/screenshots/WordtrendingAIL.png?raw=true "AIL framework wordtrending") +![Dashboard](./doc/screenshots/dashboard.png?raw=true "AIL framework dashboard") + +Trending charts +--------------- + +![Trending-Web](./doc/screenshots/trending-web.png?raw=true "AIL framework webtrending") +![Trending-Modules](./doc/screenshots/trending-module.png?raw=true "AIL framework modulestrending") + +Browsing +-------- + +![Browse-Pastes](./doc/screenshots/browse-important.png?raw=true "AIL framework browseImportantPastes") + +Sentiment analysis +------------------ + +![Sentiment](./doc/screenshots/sentiment.png?raw=true "AIL framework sentimentanalysis") + +Terms manager and occurence +--------------------------- + +![Term-Manager](./doc/screenshots/terms-manager.png?raw=true "AIL framework termManager") + +## Top terms + +![Term-Top](./doc/screenshots/terms-top.png?raw=true "AIL framework termTop") +![Term-Plot](./doc/screenshots/terms-plot.png?raw=true "AIL framework termPlot") + AIL framework screencast: https://www.youtube.com/watch?v=9idfHCIMzBY @@ -26,6 +52,9 @@ Features * Module for extracting Tor .onion addresses (to be further processed for analysis) * Extracting and validating potential hostnames (e.g. to feed Passive DNS systems) * A full-text indexer module to index unstructured information +* Modules and web statistics +* Global sentiment analysis for each providers based on nltk vader module +* Terms tracking and occurence * Many more modules for extracting phone numbers, credentials and others Installation @@ -48,6 +77,7 @@ linux based distributions, you can replace it with [installing_deps_archlinux.sh There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems. + Starting AIL web interface -------------------------- @@ -94,6 +124,7 @@ Redis and LevelDB overview * DB 0 - Cache hostname/dns * Redis on TCP port 6380 - Redis Pub-Sub only * Redis on TCP port 6381 - DB 0 - Queue and Paste content LRU cache +* Redis on TCP port 6382 - DB 1-4 - Trending, terms and sentiments * LevelDB on TCP port - Lines duplicate LICENSE diff --git a/bin/Attribute.py b/bin/Attributes.py similarity index 100% rename from bin/Attribute.py rename to bin/Attributes.py diff --git a/bin/CreditCard.py b/bin/CreditCards.py similarity index 97% rename from bin/CreditCard.py rename to bin/CreditCards.py index 430c6d7a..04ce9c62 100755 --- a/bin/CreditCard.py +++ b/bin/CreditCards.py @@ -66,7 +66,7 @@ if __name__ == "__main__": publisher.warning('{}Checked {} valid number(s)'.format( to_print, len(creditcard_set))) #Send to duplicate - p.populate_set_out(filepath, 'Duplicate') + p.populate_set_out(filename, 'Duplicate') #send to Browse_warning_paste p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste') else: diff --git a/bin/Curve_manage_top_sets.py b/bin/CurveManageTopSets.py similarity index 84% rename from bin/Curve_manage_top_sets.py rename to bin/CurveManageTopSets.py index 34c1c238..8f316333 100755 --- a/bin/Curve_manage_top_sets.py +++ b/bin/CurveManageTopSets.py @@ -22,8 +22,8 @@ from pubsublogger import publisher from packages import lib_words import datetime import calendar - -from Helper import Process +import os +import ConfigParser # Config Variables Refresh_rate = 60*5 #sec @@ -96,13 +96,19 @@ if __name__ == '__main__': # Script is the default channel used for the modules. publisher.channel = 'Script' - config_section = 'CurveManageTopSets' - p = Process(config_section) + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + + cfg = ConfigParser.ConfigParser() + cfg.read(configfile) server_term = redis.StrictRedis( - host=p.config.get("Redis_Level_DB_TermFreq", "host"), - port=p.config.get("Redis_Level_DB_TermFreq", "port"), - db=p.config.get("Redis_Level_DB_TermFreq", "db")) + host=cfg.get("Redis_Level_DB_TermFreq", "host"), + port=cfg.getint("Redis_Level_DB_TermFreq", "port"), + db=cfg.getint("Redis_Level_DB_TermFreq", "db")) publisher.info("Script Curve_manage_top_set started") @@ -113,11 +119,6 @@ if __name__ == '__main__': while True: # Get one message from the input queue (module only work if linked with a queue) - message = p.get_from_set() - if message is None: - publisher.debug("{} queue is empty, waiting".format(config_section)) - print 'sleeping' - time.sleep(Refresh_rate) # sleep a long time then manage the set - manage_top_set() - continue + time.sleep(Refresh_rate) # sleep a long time then manage the set + manage_top_set() diff --git a/bin/Duplicate_ssdeep.py b/bin/Duplicate_ssdeep.py deleted file mode 100755 index 1b173eca..00000000 --- a/bin/Duplicate_ssdeep.py +++ /dev/null @@ -1,182 +0,0 @@ -#!/usr/bin/env python2 -# -*-coding:UTF-8 -* - -""" -The Duplicate module -==================== - -This huge module is, in short term, checking duplicates. - -Requirements: -------------- - - -""" -import redis -import os -import time -import datetime -import json -import ssdeep -from packages import Paste -from pubsublogger import publisher - -from Helper import Process - -if __name__ == "__main__": - publisher.port = 6380 - publisher.channel = "Script" - - config_section = 'Duplicates' - save_dico_and_reload = 1 #min - time_1 = time.time() - flag_reload_from_disk = True - flag_write_to_disk = False - - p = Process(config_section) - - # REDIS # - # DB OBJECT & HASHS ( DISK ) - # FIXME increase flexibility - dico_redis = {} - for year in xrange(2013, datetime.date.today().year+1): - for month in xrange(0, 16): - dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( - host=p.config.get("Redis_Level_DB", "host"), port=year, - db=month) - #print("dup: "+str(year)+str(month).zfill(2)+"\n") - - # FUNCTIONS # - publisher.info("Script duplicate started") - - dicopath = os.path.join(os.environ['AIL_HOME'], - p.config.get("Directories", "dicofilters")) - - dico_path_set = set() - while True: - try: - hash_dico = {} - dupl = [] - - x = time.time() - - message = p.get_from_set() - if message is not None: - path = message - PST = Paste.Paste(path) - else: - publisher.debug("Script Attribute is idling 10s") - time.sleep(10) - continue - - PST._set_p_hash_kind("ssdeep") - - # Assignate the correct redis connexion - r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] - - # Creating the dicor name: dicoyyyymm - filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year + - PST.p_date.month) - filedicopath_today = filedicopath - - # Save I/O - if time.time() - time_1 > save_dico_and_reload*60: - flag_write_to_disk = True - - if os.path.exists(filedicopath): - if flag_reload_from_disk == True: - flag_reload_from_disk = False - print 'Reloading' - with open(filedicopath, 'r') as fp: - today_dico = json.load(fp) - else: - today_dico = {} - with open(filedicopath, 'w') as fp: - json.dump(today_dico, fp) - - # For now, just use monthly dico - dico_path_set.add(filedicopath) - - # UNIQUE INDEX HASHS TABLE - yearly_index = str(datetime.date.today().year)+'00' - r_serv0 = dico_redis[yearly_index] - r_serv0.incr("current_index") - index = r_serv0.get("current_index")+str(PST.p_date) - - # For each dico - opened_dico = [] - for dico in dico_path_set: - # Opening dico - if dico == filedicopath_today: - opened_dico.append([dico, today_dico]) - else: - with open(dico, 'r') as fp: - opened_dico.append([dico, json.load(fp)]) - - - #retrieve hash from paste - paste_hash = PST._get_p_hash() - - # Go throught the Database of the dico (of the month) - threshold_dup = 99 - for dico_name, dico in opened_dico: - for dico_key, dico_hash in dico.items(): - percent = ssdeep.compare(dico_hash, paste_hash) - if percent > threshold_dup: - db = dico_name[-6:] - # Go throught the Database of the dico filter (month) - r_serv_dico = dico_redis[db] - - # index of paste - index_current = r_serv_dico.get(dico_hash) - paste_path = r_serv_dico.get(index_current) - if paste_path != None: - hash_dico[dico_hash] = (paste_path, percent) - - #print 'comparing: ' + str(dico_hash[:20]) + ' and ' + str(paste_hash[:20]) + ' percentage: ' + str(percent) - print ' '+ PST.p_path[44:] +', '+ paste_path[44:] + ', ' + str(percent) - - # Add paste in DB to prevent its analyse twice - # HASHTABLES PER MONTH (because of r_serv1 changing db) - r_serv1.set(index, PST.p_path) - r_serv1.sadd("INDEX", index) - # Adding the hash in Redis - r_serv1.set(paste_hash, index) - r_serv1.sadd("HASHS", paste_hash) - ##################### Similarity found ####################### - - # if there is data in this dictionnary - if len(hash_dico) != 0: - for dico_hash, paste_tuple in hash_dico.items(): - paste_path, percent = paste_tuple - dupl.append((paste_path, percent)) - - # Creating the object attribute and save it. - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - if dupl != []: - PST.__setattr__("p_duplicate", dupl) - PST.save_attribute_redis("p_duplicate", dupl) - publisher.info('{}Detected {}'.format(to_print, len(dupl))) - print '{}Detected {}'.format(to_print, len(dupl)) - - y = time.time() - - publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) - - - # Adding the hash in the dico of the month - today_dico[index] = paste_hash - - if flag_write_to_disk: - time_1 = time.time() - flag_write_to_disk = False - flag_reload_from_disk = True - print 'writing' - with open(filedicopath, 'w') as fp: - json.dump(today_dico, fp) - except IOError: - to_print = 'Duplicate;{};{};{};'.format( - PST.p_source, PST.p_date, PST.p_name) - print "CRC Checksum Failed on :", PST.p_path - publisher.error('{}CRC Checksum Failed'.format(to_print)) diff --git a/bin/Duplicate_ssdeep_v2.py b/bin/Duplicates.py similarity index 100% rename from bin/Duplicate_ssdeep_v2.py rename to bin/Duplicates.py diff --git a/bin/Duplicate.py b/bin/Duplicates_old.py similarity index 100% rename from bin/Duplicate.py rename to bin/Duplicates_old.py diff --git a/bin/Helper.py b/bin/Helper.py index 78a1c94f..66d7766a 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -16,6 +16,7 @@ import ConfigParser import os import zmq import time +import datetime import json @@ -132,7 +133,25 @@ class Process(object): in_set = self.subscriber_name + 'in' self.r_temp.hset('queues', self.subscriber_name, int(self.r_temp.scard(in_set))) - return self.r_temp.spop(in_set) + message = self.r_temp.spop(in_set) + timestamp = int(time.mktime(datetime.datetime.now().timetuple())) + dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') + + if message is None: + return None + + else: + try: + path = message.split(".")[-2].split("/")[-1] + value = str(timestamp) + ", " + path + self.r_temp.set("MODULE_"+self.subscriber_name, value) + return message + + except: + path = "?" + value = str(timestamp) + ", " + path + self.r_temp.set("MODULE_"+self.subscriber_name, value) + return message def populate_set_out(self, msg, channel=None): # multiproc diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index fef9c3ce..024b22e4 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -114,31 +114,31 @@ function launching_scripts { screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' + screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' + screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' + screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' + screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' + screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' + screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' + screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' sleep 0.1 screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' sleep 0.1 @@ -158,7 +158,9 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0; read x' } diff --git a/bin/Line.py b/bin/Lines.py similarity index 100% rename from bin/Line.py rename to bin/Lines.py diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py new file mode 100755 index 00000000..8fcd8c78 --- /dev/null +++ b/bin/ModuleInformation.py @@ -0,0 +1,155 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +import time +import datetime +import redis +import os +import signal +import argparse +from subprocess import PIPE, Popen +import ConfigParser +import json +from terminaltables import AsciiTable +import textwrap + +# CONFIG VARIABLES +threshold_stucked_module = 60*60*1 #1 hour +log_filename = "../logs/moduleInfo.log" +command_search_pid = "ps a -o pid,cmd | grep {}" +command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" + + +def getPid(module): + p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + for line in p.stdout: + splittedLine = line.split() + if 'python2' in splittedLine: + return int(splittedLine[0]) + else: + return None + + +def kill_module(module): + print '' + print '-> trying to kill module:', module + + pid = getPid(module) + if pid is not None: + os.kill(pid, signal.SIGUSR1) + time.sleep(1) + if getPid(module) is None: + print module, 'has been killed' + print 'restarting', module, '...' + p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + + else: + print 'killing failed, retrying...' + time.sleep(3) + os.kill(pid, signal.SIGUSR1) + time.sleep(1) + if getPid(module) is None: + print module, 'has been killed' + print 'restarting', module, '...' + p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + else: + print 'killing failed!' + time.sleep(7) + + +if __name__ == "__main__": + + parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.') + parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate') + parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)') + + args = parser.parse_args() + + configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') + if not os.path.exists(configfile): + raise Exception('Unable to find the configuration file. \ + Did you set environment variables? \ + Or activate the virtualenv.') + + cfg = ConfigParser.ConfigParser() + cfg.read(configfile) + + # REDIS # + server = redis.StrictRedis( + host=cfg.get("Redis_Queues", "host"), + port=cfg.getint("Redis_Queues", "port"), + db=cfg.getint("Redis_Queues", "db")) + + while True: + + num = 0 + printarray1 = [] + printarray2 = [] + for queue, card in server.hgetall("queues").iteritems(): + key = "MODULE_" + queue + value = server.get(key) + if value is not None: + timestamp, path = value.split(", ") + if timestamp is not None and path is not None: + num += 1 + startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) + processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] + + if int(card) > 0: + if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module: + log = open(log_filename, 'a') + log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n") + if args.autokill == 1: + kill_module(queue) + + printarray1.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) + + else: + printarray2.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) + + printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) + printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) + printarray1.insert(0,["#", "Queue", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) + printarray2.insert(0,["#", "Queue", "Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) + + os.system('clear') + t1 = AsciiTable(printarray1, title="Working queues") + t1.column_max_width(1) + if not t1.ok: + longest_col = t1.column_widths.index(max(t1.column_widths)) + max_length_col = t1.column_max_width(longest_col) + if max_length_col > 0: + for i, content in enumerate(t1.table_data): + if len(content[longest_col]) > max_length_col: + temp = '' + for l in content[longest_col].splitlines(): + if len(l) > max_length_col: + temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' + else: + temp += l + '\n' + content[longest_col] = temp.strip() + t1.table_data[i] = content + + t2 = AsciiTable(printarray2, title="Idling queues") + t2.column_max_width(1) + if not t2.ok: + longest_col = t2.column_widths.index(max(t2.column_widths)) + max_length_col = t2.column_max_width(longest_col) + if max_length_col > 0: + for i, content in enumerate(t2.table_data): + if len(content[longest_col]) > max_length_col: + temp = '' + for l in content[longest_col].splitlines(): + if len(l) > max_length_col: + temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' + else: + temp += l + '\n' + content[longest_col] = temp.strip() + t2.table_data[i] = content + + + print t1.table + print '\n' + print t2.table + + time.sleep(args.refresh) diff --git a/bin/SentimentAnalyser.py b/bin/SentimentAnalysis.py similarity index 100% rename from bin/SentimentAnalyser.py rename to bin/SentimentAnalysis.py diff --git a/bin/Url.py b/bin/Web.py similarity index 100% rename from bin/Url.py rename to bin/Web.py diff --git a/bin/WebStats.py b/bin/WebStats.py index 6fdd9ee3..20c1f489 100755 --- a/bin/WebStats.py +++ b/bin/WebStats.py @@ -77,12 +77,14 @@ def compute_progression(server, field_name, num_day, url_parsed): member_set = [] for keyw in server.smembers(redis_progression_name_set): member_set.append((keyw, int(server.hget(redis_progression_name, keyw)))) - print member_set member_set.sort(key=lambda tup: tup[1]) if member_set[0][1] < keyword_increase: + print 'removing', member_set[0][0] + '('+str(member_set[0][1])+')', 'and adding', keyword, str(keyword_increase) #remove min from set and add the new one - server.srem(redis_progression_name_set, member_set[0]) + server.srem(redis_progression_name_set, member_set[0][0]) server.sadd(redis_progression_name_set, keyword) + server.hdel(redis_progression_name, member_set[0][0]) + server.hset(redis_progression_name, keyword, keyword_increase) if __name__ == '__main__': diff --git a/bin/launch_scripts.sh b/bin/launch_scripts.sh index ad55244a..1cdde370 100755 --- a/bin/launch_scripts.sh +++ b/bin/launch_scripts.sh @@ -8,50 +8,52 @@ sleep 0.1 echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT -screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' -sleep 0.1 -screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' + screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' + sleep 0.1 + screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x' diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index bf6fa695..53fab4ea 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -34,7 +34,7 @@ subscribe = Redis_Global publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve [CreditCards] -subscribe = Redis_CreditCard +subscribe = Redis_CreditCards publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste [Mail] diff --git a/doc/generate_graph_data.py b/doc/generate_graph_data.py new file mode 100755 index 00000000..cff1f538 --- /dev/null +++ b/doc/generate_graph_data.py @@ -0,0 +1,62 @@ +#!/usr/bin/env python2 +# -*-coding:UTF-8 -* + +content = "" +modules = {} +all_modules = [] +curr_module = "" +streamingPub = {} +streamingSub = {} + +with open('../bin/packages/modules.cfg', 'r') as f: + for line in f: + if line[0] != '#': + if line[0] == '[': + curr_name = line.replace('[','').replace(']','').replace('\n', '').replace(' ', '') + all_modules.append(curr_name) + modules[curr_name] = {'sub': [], 'pub': []} + curr_module = curr_name + elif curr_module != "": # searching for sub or pub + if line.startswith("subscribe"): + curr_subscribers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')] + modules[curr_module]['sub'] = curr_subscribers + for sub in curr_subscribers: + streamingSub[sub] = curr_module + + elif line.startswith("publish"): + curr_publishers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')] + modules[curr_module]['pub'] = curr_publishers + for pub in curr_publishers: + streamingPub[pub] = curr_module + else: + continue + + output_set_graph = set() + + for module in modules.keys(): + for stream_in in modules[module]['sub']: + if stream_in not in streamingPub.keys(): + output_set_graph.add("\"" + stream_in + "\" [color=darkorange1] ;\n") + output_set_graph.add("\"" + stream_in + "\"" + "->" + module + ";\n") + else: + output_set_graph.add("\"" + streamingPub[stream_in] + "\"" + "->" + module + ";\n") + + for stream_out in modules[module]['pub']: + if stream_out not in streamingSub.keys(): + output_set_graph.add("\"" + stream_out + "\" [color=darkorange1] ;\n") + output_set_graph.add("\"" + stream_out + "\"" + "->" + module + ";\n") + else: + output_set_graph.add("\"" + module + "\"" + "->" + streamingSub[stream_out] + ";\n") + + + output_text_graph = "" + output_text_graph += "digraph unix {\n"\ + "graph [pad=\"0.5\"];\n"\ + "size=\"25,25\";\n"\ + "node [color=lightblue2, style=filled];\n" + + for elem in output_set_graph: + output_text_graph += elem + + output_text_graph += "}" + print output_text_graph diff --git a/doc/generate_modules_data_flow_graph.sh b/doc/generate_modules_data_flow_graph.sh new file mode 100755 index 00000000..90a578e0 --- /dev/null +++ b/doc/generate_modules_data_flow_graph.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +python generate_graph_data.py | dot -T png -o module-data-flow.png diff --git a/doc/module-data-flow.png b/doc/module-data-flow.png new file mode 100644 index 00000000..73ed3b1d Binary files /dev/null and b/doc/module-data-flow.png differ diff --git a/doc/screenshots/browse-important.png b/doc/screenshots/browse-important.png new file mode 100644 index 00000000..0490c03e Binary files /dev/null and b/doc/screenshots/browse-important.png differ diff --git a/doc/screenshots/dashboard.png b/doc/screenshots/dashboard.png new file mode 100644 index 00000000..cd633473 Binary files /dev/null and b/doc/screenshots/dashboard.png differ diff --git a/doc/screenshots/terms-manager.png b/doc/screenshots/terms-manager.png new file mode 100644 index 00000000..15d76804 Binary files /dev/null and b/doc/screenshots/terms-manager.png differ diff --git a/doc/screenshots/terms-plot.png b/doc/screenshots/terms-plot.png new file mode 100644 index 00000000..32647906 Binary files /dev/null and b/doc/screenshots/terms-plot.png differ diff --git a/doc/screenshots/terms-top.png b/doc/screenshots/terms-top.png new file mode 100644 index 00000000..e833ac35 Binary files /dev/null and b/doc/screenshots/terms-top.png differ diff --git a/doc/screenshots/trending-module.png b/doc/screenshots/trending-module.png new file mode 100644 index 00000000..1330ead8 Binary files /dev/null and b/doc/screenshots/trending-module.png differ diff --git a/doc/screenshots/trending-web.png b/doc/screenshots/trending-web.png new file mode 100644 index 00000000..cd7e510b Binary files /dev/null and b/doc/screenshots/trending-web.png differ diff --git a/installing_deps.sh b/installing_deps.sh index 805818e1..8b790f1f 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -17,6 +17,9 @@ sudo apt-get install libadns1 libadns1-dev #Needed for redis-lvlDB sudo apt-get install libev-dev libgmp-dev +#Need for generate-data-flow graph +sudo apt-get install graphviz + #needed for mathplotlib test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ sudo easy_install -U distribute @@ -69,6 +72,7 @@ echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate mkdir -p $AIL_HOME/{PASTES,Blooms,dumps} mkdir -p $AIL_HOME/LEVEL_DB_DATA/2016 +mkdir -p $AIL_HOME/LEVEL_DB_DATA/3016 pip install -U pip pip install -r pip_packages_requirement.txt @@ -83,5 +87,6 @@ pushd tlsh/py_ext python setup.py build python setup.py install -# Download the necessary NLTK corpora +# Download the necessary NLTK corpora and sentiment vader HOME=$(pwd) python -m textblob.download_corpora +python -m nltk.downloader vader_lexicon diff --git a/pip_packages_requirement.txt b/pip_packages_requirement.txt index bd734175..b80d14bd 100644 --- a/pip_packages_requirement.txt +++ b/pip_packages_requirement.txt @@ -10,6 +10,7 @@ textblob numpy matplotlib networkx +terminaltables #Tokeniser nltk diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 45efd8f1..8b6e05e3 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -81,8 +81,22 @@ def event_stream(): def get_queues(r): # We may want to put the llen in a pipeline to do only one query. - return [(queue, int(card)) for queue, card in - r.hgetall("queues").iteritems()] + data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()] + newData = [] + for queue, card in data: + key = "MODULE_" + queue + value = r.get(key) + if value is not None: + timestamp, path = value.split(", ") + if timestamp is not None: + startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) + processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] + seconds = int((datetime.datetime.now() - startTime_readable).total_seconds()) + newData.append( (queue, card, seconds) ) + else: + newData.append( (queue, cards, 0) ) + + return newData def list_len(s): diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index f0cd101f..359a1e56 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -221,11 +221,17 @@ function create_queue_table() { for(i = 0; i < (glob_tabvar.row1).length;i++){ var tr = document.createElement('TR') - for(j = 0; j < (glob_tabvar.row1[i]).length; j++){ + for(j = 0; j < 2; j++){ var td = document.createElement('TD') td.appendChild(document.createTextNode(glob_tabvar.row1[i][j])); tr.appendChild(td) } + if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2) + tr.className += " danger"; + else if (parseInt(glob_tabvar.row1[i][2]) > 60*1) + tr.className += " warning"; + else + tr.className += " success"; tableBody.appendChild(tr); } Tablediv.appendChild(table);