Merge branch 'master' of github.com:CIRCL/AIL-framework
							
								
								
									
										35
									
								
								README.md
								
								
								
								
							
							
						
						|  | @ -9,8 +9,34 @@ AIL framework - Framework for Analysis of Information Leaks | |||
| 
 | ||||
| AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information. | ||||
| 
 | ||||
|  | ||||
|  | ||||
|  | ||||
| 
 | ||||
| Trending charts | ||||
| --------------- | ||||
| 
 | ||||
|  | ||||
|  | ||||
| 
 | ||||
| Browsing | ||||
| -------- | ||||
| 
 | ||||
|  | ||||
| 
 | ||||
| Sentiment analysis | ||||
| ------------------ | ||||
| 
 | ||||
|  | ||||
| 
 | ||||
| Terms manager and occurence | ||||
| --------------------------- | ||||
| 
 | ||||
|  | ||||
| 
 | ||||
| ## Top terms | ||||
| 
 | ||||
|  | ||||
|  | ||||
| 
 | ||||
| 
 | ||||
| AIL framework screencast: https://www.youtube.com/watch?v=9idfHCIMzBY | ||||
| 
 | ||||
|  | @ -26,6 +52,9 @@ Features | |||
| * Module for extracting Tor .onion addresses (to be further processed for analysis) | ||||
| * Extracting and validating potential hostnames (e.g. to feed Passive DNS systems) | ||||
| * A full-text indexer module to index unstructured information | ||||
| * Modules and web statistics  | ||||
| * Global sentiment analysis for each providers based on nltk vader module | ||||
| * Terms tracking and occurence | ||||
| * Many more modules for extracting phone numbers, credentials and others | ||||
| 
 | ||||
| Installation | ||||
|  | @ -48,6 +77,7 @@ linux based distributions, you can replace it with [installing_deps_archlinux.sh | |||
| 
 | ||||
| There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems. | ||||
| 
 | ||||
| 
 | ||||
| Starting AIL web interface | ||||
| -------------------------- | ||||
| 
 | ||||
|  | @ -94,6 +124,7 @@ Redis and LevelDB overview | |||
| *                          DB 0 - Cache hostname/dns | ||||
| * Redis on TCP port 6380 - Redis Pub-Sub only | ||||
| * Redis on TCP port 6381 - DB 0 - Queue and Paste content LRU cache | ||||
| * Redis on TCP port 6382 - DB 1-4 - Trending, terms and sentiments | ||||
| * LevelDB on TCP port <year> - Lines duplicate | ||||
| 
 | ||||
| LICENSE | ||||
|  |  | |||
|  | @ -66,7 +66,7 @@ if __name__ == "__main__": | |||
|                     publisher.warning('{}Checked {} valid number(s)'.format( | ||||
|                         to_print, len(creditcard_set))) | ||||
|                     #Send to duplicate | ||||
|                     p.populate_set_out(filepath, 'Duplicate') | ||||
|                     p.populate_set_out(filename, 'Duplicate') | ||||
|                     #send to Browse_warning_paste | ||||
|                     p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste') | ||||
|                 else: | ||||
|  | @ -22,8 +22,8 @@ from pubsublogger import publisher | |||
| from packages import lib_words | ||||
| import datetime | ||||
| import calendar | ||||
| 
 | ||||
| from Helper import Process | ||||
| import os | ||||
| import ConfigParser | ||||
| 
 | ||||
| # Config Variables | ||||
| Refresh_rate = 60*5 #sec | ||||
|  | @ -96,13 +96,19 @@ if __name__ == '__main__': | |||
|     # Script is the default channel used for the modules. | ||||
|     publisher.channel = 'Script' | ||||
| 
 | ||||
|     config_section = 'CurveManageTopSets' | ||||
|     p = Process(config_section) | ||||
|     configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') | ||||
|     if not os.path.exists(configfile): | ||||
|         raise Exception('Unable to find the configuration file. \ | ||||
|                         Did you set environment variables? \ | ||||
|                         Or activate the virtualenv.') | ||||
|      | ||||
|     cfg = ConfigParser.ConfigParser() | ||||
|     cfg.read(configfile) | ||||
| 
 | ||||
|     server_term = redis.StrictRedis( | ||||
|         host=p.config.get("Redis_Level_DB_TermFreq", "host"), | ||||
|         port=p.config.get("Redis_Level_DB_TermFreq", "port"), | ||||
|         db=p.config.get("Redis_Level_DB_TermFreq", "db")) | ||||
|         host=cfg.get("Redis_Level_DB_TermFreq", "host"), | ||||
|         port=cfg.getint("Redis_Level_DB_TermFreq", "port"), | ||||
|         db=cfg.getint("Redis_Level_DB_TermFreq", "db")) | ||||
| 
 | ||||
|     publisher.info("Script Curve_manage_top_set started") | ||||
| 
 | ||||
|  | @ -113,11 +119,6 @@ if __name__ == '__main__': | |||
| 
 | ||||
|     while True: | ||||
|         # Get one message from the input queue (module only work if linked with a queue) | ||||
|         message = p.get_from_set() | ||||
|         if message is None: | ||||
|             publisher.debug("{} queue is empty, waiting".format(config_section)) | ||||
|             print 'sleeping' | ||||
|             time.sleep(Refresh_rate) # sleep a long time then manage the set | ||||
|             manage_top_set() | ||||
|             continue | ||||
|         time.sleep(Refresh_rate) # sleep a long time then manage the set | ||||
|         manage_top_set() | ||||
| 
 | ||||
|  | @ -1,182 +0,0 @@ | |||
| #!/usr/bin/env python2 | ||||
| # -*-coding:UTF-8 -* | ||||
| 
 | ||||
| """ | ||||
| The Duplicate module | ||||
| ==================== | ||||
| 
 | ||||
| This huge module is, in short term, checking duplicates. | ||||
| 
 | ||||
| Requirements: | ||||
| ------------- | ||||
| 
 | ||||
| 
 | ||||
| """ | ||||
| import redis | ||||
| import os | ||||
| import time | ||||
| import datetime | ||||
| import json | ||||
| import ssdeep | ||||
| from packages import Paste | ||||
| from pubsublogger import publisher | ||||
| 
 | ||||
| from Helper import Process | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
|     publisher.port = 6380 | ||||
|     publisher.channel = "Script" | ||||
| 
 | ||||
|     config_section = 'Duplicates' | ||||
|     save_dico_and_reload = 1 #min | ||||
|     time_1 = time.time() | ||||
|     flag_reload_from_disk = True | ||||
|     flag_write_to_disk = False | ||||
| 
 | ||||
|     p = Process(config_section) | ||||
| 
 | ||||
|     # REDIS # | ||||
|     # DB OBJECT & HASHS ( DISK ) | ||||
|     # FIXME increase flexibility | ||||
|     dico_redis = {} | ||||
|     for year in xrange(2013, datetime.date.today().year+1): | ||||
|         for month in xrange(0, 16): | ||||
|             dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis( | ||||
|                 host=p.config.get("Redis_Level_DB", "host"), port=year, | ||||
|                 db=month) | ||||
| 	    #print("dup: "+str(year)+str(month).zfill(2)+"\n") | ||||
| 
 | ||||
|     # FUNCTIONS # | ||||
|     publisher.info("Script duplicate started") | ||||
| 
 | ||||
|     dicopath = os.path.join(os.environ['AIL_HOME'], | ||||
|                              p.config.get("Directories", "dicofilters")) | ||||
| 
 | ||||
|     dico_path_set = set() | ||||
|     while True: | ||||
|         try: | ||||
|             hash_dico = {} | ||||
|             dupl = [] | ||||
| 
 | ||||
|             x = time.time() | ||||
| 
 | ||||
|             message = p.get_from_set() | ||||
|             if message is not None: | ||||
|                 path = message | ||||
|                 PST = Paste.Paste(path) | ||||
|             else: | ||||
|                 publisher.debug("Script Attribute is idling 10s") | ||||
|                 time.sleep(10) | ||||
|                 continue | ||||
| 
 | ||||
|             PST._set_p_hash_kind("ssdeep") | ||||
| 
 | ||||
|             # Assignate the correct redis connexion | ||||
|             r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month] | ||||
| 
 | ||||
|             # Creating the dicor name: dicoyyyymm | ||||
|             filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year + | ||||
|                                          PST.p_date.month) | ||||
|             filedicopath_today = filedicopath | ||||
| 
 | ||||
|             # Save I/O | ||||
|             if time.time() - time_1 > save_dico_and_reload*60: | ||||
|                 flag_write_to_disk = True | ||||
| 
 | ||||
|             if os.path.exists(filedicopath): | ||||
|                 if flag_reload_from_disk == True: | ||||
|                     flag_reload_from_disk = False | ||||
|                     print 'Reloading' | ||||
|                     with open(filedicopath, 'r') as fp: | ||||
|                         today_dico = json.load(fp) | ||||
|             else: | ||||
|                 today_dico = {} | ||||
|                 with open(filedicopath, 'w') as fp: | ||||
|                     json.dump(today_dico, fp) | ||||
| 
 | ||||
|             # For now, just use monthly dico | ||||
|             dico_path_set.add(filedicopath) | ||||
| 
 | ||||
|             # UNIQUE INDEX HASHS TABLE | ||||
|             yearly_index = str(datetime.date.today().year)+'00' | ||||
|             r_serv0 = dico_redis[yearly_index] | ||||
|             r_serv0.incr("current_index") | ||||
|             index = r_serv0.get("current_index")+str(PST.p_date) | ||||
|              | ||||
|             # For each dico | ||||
|             opened_dico = [] | ||||
|             for dico in dico_path_set: | ||||
|                 # Opening dico | ||||
|                 if dico == filedicopath_today: | ||||
|                     opened_dico.append([dico, today_dico]) | ||||
|                 else: | ||||
|                     with open(dico, 'r') as fp: | ||||
|                         opened_dico.append([dico, json.load(fp)]) | ||||
| 
 | ||||
|                | ||||
|             #retrieve hash from paste | ||||
|             paste_hash = PST._get_p_hash() | ||||
|              | ||||
|             # Go throught the Database of the dico (of the month) | ||||
|             threshold_dup = 99  | ||||
|             for dico_name, dico in opened_dico: | ||||
|                 for dico_key, dico_hash in dico.items(): | ||||
|                     percent = ssdeep.compare(dico_hash, paste_hash) | ||||
|                     if percent > threshold_dup: | ||||
|                         db = dico_name[-6:] | ||||
|                         # Go throught the Database of the dico filter (month) | ||||
|                         r_serv_dico = dico_redis[db] | ||||
|                          | ||||
|                         # index of paste | ||||
|                         index_current = r_serv_dico.get(dico_hash) | ||||
|                         paste_path = r_serv_dico.get(index_current) | ||||
|                         if paste_path != None: | ||||
|                             hash_dico[dico_hash] = (paste_path, percent) | ||||
| 
 | ||||
|                         #print 'comparing: ' + str(dico_hash[:20]) + '  and  ' + str(paste_hash[:20]) + ' percentage: ' + str(percent) | ||||
|                         print '   '+ PST.p_path[44:]  +', '+ paste_path[44:] + ', ' + str(percent) | ||||
| 
 | ||||
|             # Add paste in DB to prevent its analyse twice | ||||
|             # HASHTABLES PER MONTH (because of r_serv1 changing db) | ||||
|             r_serv1.set(index, PST.p_path) | ||||
|             r_serv1.sadd("INDEX", index) | ||||
|             # Adding the hash in Redis | ||||
|             r_serv1.set(paste_hash, index) | ||||
|             r_serv1.sadd("HASHS", paste_hash) | ||||
|     ##################### Similarity found  ####################### | ||||
| 
 | ||||
|             # if there is data in this dictionnary | ||||
|             if len(hash_dico) != 0: | ||||
|                 for dico_hash, paste_tuple in hash_dico.items(): | ||||
|                     paste_path, percent = paste_tuple | ||||
|                     dupl.append((paste_path, percent)) | ||||
| 
 | ||||
|                 # Creating the object attribute and save it. | ||||
|                 to_print = 'Duplicate;{};{};{};'.format( | ||||
|                     PST.p_source, PST.p_date, PST.p_name) | ||||
|                 if dupl != []: | ||||
|                     PST.__setattr__("p_duplicate", dupl) | ||||
|                     PST.save_attribute_redis("p_duplicate", dupl) | ||||
|                     publisher.info('{}Detected {}'.format(to_print, len(dupl))) | ||||
|                     print '{}Detected {}'.format(to_print, len(dupl)) | ||||
| 
 | ||||
|                 y = time.time() | ||||
| 
 | ||||
|                 publisher.debug('{}Processed in {} sec'.format(to_print, y-x)) | ||||
|             | ||||
| 
 | ||||
|             # Adding the hash in the dico of the month | ||||
|             today_dico[index] = paste_hash | ||||
| 
 | ||||
|             if flag_write_to_disk: | ||||
|                 time_1 = time.time() | ||||
|                 flag_write_to_disk = False | ||||
|                 flag_reload_from_disk = True | ||||
|                 print 'writing' | ||||
|                 with open(filedicopath, 'w') as fp: | ||||
|                     json.dump(today_dico, fp) | ||||
|         except IOError: | ||||
|             to_print = 'Duplicate;{};{};{};'.format( | ||||
|                 PST.p_source, PST.p_date, PST.p_name) | ||||
|             print "CRC Checksum Failed on :", PST.p_path | ||||
|             publisher.error('{}CRC Checksum Failed'.format(to_print)) | ||||
|  | @ -16,6 +16,7 @@ import ConfigParser | |||
| import os | ||||
| import zmq | ||||
| import time | ||||
| import datetime | ||||
| import json | ||||
| 
 | ||||
| 
 | ||||
|  | @ -132,7 +133,25 @@ class Process(object): | |||
|         in_set = self.subscriber_name + 'in' | ||||
|         self.r_temp.hset('queues', self.subscriber_name, | ||||
|                          int(self.r_temp.scard(in_set))) | ||||
|         return self.r_temp.spop(in_set) | ||||
|         message = self.r_temp.spop(in_set) | ||||
|         timestamp = int(time.mktime(datetime.datetime.now().timetuple())) | ||||
|         dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes') | ||||
| 
 | ||||
|         if message is None: | ||||
|             return None | ||||
| 
 | ||||
|         else: | ||||
|             try: | ||||
|                 path = message.split(".")[-2].split("/")[-1] | ||||
|                 value = str(timestamp) + ", " + path | ||||
|                 self.r_temp.set("MODULE_"+self.subscriber_name, value) | ||||
|                 return message | ||||
| 
 | ||||
|             except: | ||||
|                 path = "?" | ||||
|                 value = str(timestamp) + ", " + path | ||||
|                 self.r_temp.set("MODULE_"+self.subscriber_name, value) | ||||
|                 return message | ||||
| 
 | ||||
|     def populate_set_out(self, msg, channel=None): | ||||
|         # multiproc | ||||
|  |  | |||
|  | @ -114,31 +114,31 @@ function launching_scripts { | |||
| 
 | ||||
|     screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' | ||||
|     screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' | ||||
|     screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' | ||||
|     screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' | ||||
|     screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' | ||||
|     screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' | ||||
|     screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' | ||||
|     screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' | ||||
|     sleep 0.1 | ||||
|  | @ -158,7 +158,9 @@ function launching_scripts { | |||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' | ||||
|     screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0; read x' | ||||
| 
 | ||||
| } | ||||
| 
 | ||||
|  |  | |||
|  | @ -0,0 +1,155 @@ | |||
| #!/usr/bin/env python2 | ||||
| # -*-coding:UTF-8 -* | ||||
| 
 | ||||
| import time | ||||
| import datetime | ||||
| import redis | ||||
| import os | ||||
| import signal | ||||
| import argparse | ||||
| from subprocess import PIPE, Popen | ||||
| import ConfigParser | ||||
| import json | ||||
| from terminaltables import AsciiTable | ||||
| import textwrap | ||||
| 
 | ||||
| # CONFIG VARIABLES | ||||
| threshold_stucked_module = 60*60*1 #1 hour | ||||
| log_filename = "../logs/moduleInfo.log" | ||||
| command_search_pid = "ps a -o pid,cmd | grep {}" | ||||
| command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" | ||||
| 
 | ||||
| 
 | ||||
| def getPid(module): | ||||
|     p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) | ||||
|     for line in p.stdout: | ||||
|         splittedLine = line.split() | ||||
|         if 'python2' in splittedLine: | ||||
|             return int(splittedLine[0]) | ||||
|         else: | ||||
|             return None | ||||
| 
 | ||||
| 
 | ||||
| def kill_module(module): | ||||
|     print '' | ||||
|     print '-> trying to kill module:', module | ||||
| 
 | ||||
|     pid = getPid(module) | ||||
|     if pid is not None: | ||||
|         os.kill(pid, signal.SIGUSR1) | ||||
|         time.sleep(1) | ||||
|         if getPid(module) is None: | ||||
|             print module, 'has been killed' | ||||
|             print 'restarting', module, '...' | ||||
|             p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) | ||||
| 
 | ||||
|         else: | ||||
|             print 'killing failed, retrying...' | ||||
|             time.sleep(3) | ||||
|             os.kill(pid, signal.SIGUSR1) | ||||
|             time.sleep(1) | ||||
|             if getPid(module) is None: | ||||
|                 print module, 'has been killed' | ||||
|                 print 'restarting', module, '...' | ||||
|                 p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) | ||||
|             else: | ||||
|                 print 'killing failed!' | ||||
|     time.sleep(7) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == "__main__": | ||||
| 
 | ||||
|     parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.') | ||||
|     parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate') | ||||
|     parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)') | ||||
| 
 | ||||
|     args = parser.parse_args() | ||||
| 
 | ||||
|     configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg') | ||||
|     if not os.path.exists(configfile): | ||||
|         raise Exception('Unable to find the configuration file. \ | ||||
|                         Did you set environment variables? \ | ||||
|                         Or activate the virtualenv.') | ||||
| 
 | ||||
|     cfg = ConfigParser.ConfigParser() | ||||
|     cfg.read(configfile) | ||||
| 
 | ||||
|     # REDIS # | ||||
|     server = redis.StrictRedis( | ||||
|         host=cfg.get("Redis_Queues", "host"), | ||||
|         port=cfg.getint("Redis_Queues", "port"), | ||||
|         db=cfg.getint("Redis_Queues", "db")) | ||||
| 
 | ||||
|     while True: | ||||
| 
 | ||||
|         num = 0 | ||||
|         printarray1 = [] | ||||
|         printarray2 = [] | ||||
|         for queue, card in server.hgetall("queues").iteritems(): | ||||
|             key = "MODULE_" + queue | ||||
|             value = server.get(key) | ||||
|             if value is not None: | ||||
|                 timestamp, path = value.split(", ") | ||||
|                 if timestamp is not None and path is not None: | ||||
|                     num += 1 | ||||
|                     startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) | ||||
|                     processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] | ||||
| 
 | ||||
|                     if int(card) > 0: | ||||
|                         if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module: | ||||
|                             log = open(log_filename, 'a') | ||||
|                             log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n") | ||||
|                             if args.autokill == 1: | ||||
|                                 kill_module(queue) | ||||
| 
 | ||||
|                         printarray1.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) | ||||
| 
 | ||||
|                     else: | ||||
|                         printarray2.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)]) | ||||
| 
 | ||||
|         printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) | ||||
|         printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) | ||||
|         printarray1.insert(0,["#", "Queue", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) | ||||
|         printarray2.insert(0,["#", "Queue", "Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) | ||||
| 
 | ||||
|         os.system('clear') | ||||
|         t1 = AsciiTable(printarray1, title="Working queues") | ||||
|         t1.column_max_width(1) | ||||
|         if not t1.ok: | ||||
|                 longest_col = t1.column_widths.index(max(t1.column_widths)) | ||||
|                 max_length_col = t1.column_max_width(longest_col) | ||||
|                 if max_length_col > 0: | ||||
|                     for i, content in enumerate(t1.table_data): | ||||
|                         if len(content[longest_col]) > max_length_col: | ||||
|                             temp = '' | ||||
|                             for l in content[longest_col].splitlines(): | ||||
|                                 if len(l) > max_length_col: | ||||
|                                     temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' | ||||
|                                 else: | ||||
|                                     temp += l + '\n' | ||||
|                                 content[longest_col] = temp.strip() | ||||
|                         t1.table_data[i] = content | ||||
| 
 | ||||
|         t2 = AsciiTable(printarray2, title="Idling queues") | ||||
|         t2.column_max_width(1) | ||||
|         if not t2.ok: | ||||
|                 longest_col = t2.column_widths.index(max(t2.column_widths)) | ||||
|                 max_length_col = t2.column_max_width(longest_col) | ||||
|                 if max_length_col > 0: | ||||
|                     for i, content in enumerate(t2.table_data): | ||||
|                         if len(content[longest_col]) > max_length_col: | ||||
|                             temp = '' | ||||
|                             for l in content[longest_col].splitlines(): | ||||
|                                 if len(l) > max_length_col: | ||||
|                                     temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n' | ||||
|                                 else: | ||||
|                                     temp += l + '\n' | ||||
|                                 content[longest_col] = temp.strip() | ||||
|                         t2.table_data[i] = content | ||||
| 
 | ||||
| 
 | ||||
|         print t1.table | ||||
|         print '\n' | ||||
|         print t2.table | ||||
| 
 | ||||
|         time.sleep(args.refresh) | ||||
|  | @ -77,12 +77,14 @@ def compute_progression(server, field_name, num_day, url_parsed): | |||
|                 member_set = [] | ||||
|                 for keyw in server.smembers(redis_progression_name_set): | ||||
|                     member_set.append((keyw, int(server.hget(redis_progression_name, keyw)))) | ||||
|                 print member_set | ||||
|                 member_set.sort(key=lambda tup: tup[1]) | ||||
|                 if member_set[0][1] < keyword_increase: | ||||
|                     print 'removing', member_set[0][0] + '('+str(member_set[0][1])+')', 'and adding', keyword, str(keyword_increase) | ||||
|                     #remove min from set and add the new one | ||||
|                     server.srem(redis_progression_name_set, member_set[0]) | ||||
|                     server.srem(redis_progression_name_set, member_set[0][0]) | ||||
|                     server.sadd(redis_progression_name_set, keyword) | ||||
|                     server.hdel(redis_progression_name, member_set[0][0]) | ||||
|                     server.hset(redis_progression_name, keyword, keyword_increase) | ||||
| 
 | ||||
| 
 | ||||
| if __name__ == '__main__': | ||||
|  |  | |||
|  | @ -8,50 +8,52 @@ sleep 0.1 | |||
| 
 | ||||
| echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT | ||||
| 
 | ||||
| screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' | ||||
| sleep 0.1 | ||||
| screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x' | ||||
|     screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' | ||||
|     sleep 0.1 | ||||
|     screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x' | ||||
|  |  | |||
|  | @ -34,7 +34,7 @@ subscribe = Redis_Global | |||
| publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve | ||||
| 
 | ||||
| [CreditCards] | ||||
| subscribe = Redis_CreditCard | ||||
| subscribe = Redis_CreditCards | ||||
| publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste | ||||
| 
 | ||||
| [Mail] | ||||
|  |  | |||
|  | @ -0,0 +1,62 @@ | |||
| #!/usr/bin/env python2 | ||||
| # -*-coding:UTF-8 -* | ||||
| 
 | ||||
| content = "" | ||||
| modules = {} | ||||
| all_modules = [] | ||||
| curr_module = "" | ||||
| streamingPub = {} | ||||
| streamingSub = {} | ||||
| 
 | ||||
| with open('../bin/packages/modules.cfg', 'r') as f: | ||||
|     for line in f: | ||||
|         if line[0] != '#': | ||||
|             if line[0] == '[': | ||||
|                 curr_name = line.replace('[','').replace(']','').replace('\n', '').replace(' ', '') | ||||
|                 all_modules.append(curr_name) | ||||
|                 modules[curr_name] = {'sub': [], 'pub': []} | ||||
|                 curr_module = curr_name | ||||
|             elif curr_module != "": # searching for sub or pub | ||||
|                 if line.startswith("subscribe"): | ||||
|                     curr_subscribers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')] | ||||
|                     modules[curr_module]['sub'] = curr_subscribers | ||||
|                     for sub in curr_subscribers: | ||||
|                         streamingSub[sub] = curr_module | ||||
| 
 | ||||
|                 elif line.startswith("publish"): | ||||
|                     curr_publishers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')] | ||||
|                     modules[curr_module]['pub'] = curr_publishers | ||||
|                     for pub in curr_publishers: | ||||
|                         streamingPub[pub] = curr_module | ||||
|                 else: | ||||
|                     continue | ||||
| 
 | ||||
|     output_set_graph = set() | ||||
| 
 | ||||
|     for module in modules.keys(): | ||||
|         for stream_in in modules[module]['sub']: | ||||
|             if stream_in not in streamingPub.keys(): | ||||
|                 output_set_graph.add("\"" + stream_in + "\" [color=darkorange1] ;\n") | ||||
|                 output_set_graph.add("\"" + stream_in + "\"" + "->" + module + ";\n") | ||||
|             else: | ||||
|                 output_set_graph.add("\"" + streamingPub[stream_in] + "\"" + "->" + module + ";\n") | ||||
| 
 | ||||
|         for stream_out in modules[module]['pub']: | ||||
|             if stream_out not in streamingSub.keys(): | ||||
|                 output_set_graph.add("\"" + stream_out + "\" [color=darkorange1] ;\n") | ||||
|                 output_set_graph.add("\"" + stream_out + "\"" + "->" + module + ";\n") | ||||
|             else: | ||||
|                 output_set_graph.add("\"" + module + "\"" + "->" + streamingSub[stream_out] + ";\n") | ||||
| 
 | ||||
| 
 | ||||
|     output_text_graph = "" | ||||
|     output_text_graph += "digraph unix {\n"\ | ||||
|                               "graph [pad=\"0.5\"];\n"\ | ||||
|                               "size=\"25,25\";\n"\ | ||||
|                               "node [color=lightblue2, style=filled];\n" | ||||
| 
 | ||||
|     for elem in output_set_graph: | ||||
|         output_text_graph += elem | ||||
| 
 | ||||
|     output_text_graph += "}" | ||||
|     print output_text_graph | ||||
|  | @ -0,0 +1,3 @@ | |||
| #!/bin/bash | ||||
| 
 | ||||
| python generate_graph_data.py | dot -T png -o module-data-flow.png | ||||
| After Width: | Height: | Size: 152 KiB | 
| After Width: | Height: | Size: 126 KiB | 
| After Width: | Height: | Size: 190 KiB | 
| After Width: | Height: | Size: 63 KiB | 
| After Width: | Height: | Size: 31 KiB | 
| After Width: | Height: | Size: 86 KiB | 
| After Width: | Height: | Size: 54 KiB | 
| After Width: | Height: | Size: 57 KiB | 
|  | @ -17,6 +17,9 @@ sudo apt-get install libadns1 libadns1-dev | |||
| #Needed for redis-lvlDB | ||||
| sudo apt-get install libev-dev libgmp-dev | ||||
| 
 | ||||
| #Need for generate-data-flow graph | ||||
| sudo apt-get install graphviz | ||||
| 
 | ||||
| #needed for mathplotlib | ||||
| test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/ | ||||
| sudo easy_install -U distribute | ||||
|  | @ -69,6 +72,7 @@ echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate | |||
| 
 | ||||
| mkdir -p $AIL_HOME/{PASTES,Blooms,dumps} | ||||
| mkdir -p $AIL_HOME/LEVEL_DB_DATA/2016 | ||||
| mkdir -p $AIL_HOME/LEVEL_DB_DATA/3016 | ||||
| 
 | ||||
| pip install -U pip | ||||
| pip install -r pip_packages_requirement.txt | ||||
|  | @ -83,5 +87,6 @@ pushd tlsh/py_ext | |||
| python setup.py build | ||||
| python setup.py install | ||||
| 
 | ||||
| # Download the necessary NLTK corpora | ||||
| # Download the necessary NLTK corpora and sentiment vader | ||||
| HOME=$(pwd) python -m textblob.download_corpora | ||||
| python -m nltk.downloader vader_lexicon | ||||
|  |  | |||
|  | @ -10,6 +10,7 @@ textblob | |||
| numpy | ||||
| matplotlib | ||||
| networkx | ||||
| terminaltables | ||||
| 
 | ||||
| #Tokeniser | ||||
| nltk | ||||
|  |  | |||
|  | @ -81,8 +81,22 @@ def event_stream(): | |||
| 
 | ||||
| def get_queues(r): | ||||
|     # We may want to put the llen in a pipeline to do only one query. | ||||
|     return [(queue, int(card)) for queue, card in | ||||
|             r.hgetall("queues").iteritems()] | ||||
|     data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()] | ||||
|     newData = [] | ||||
|     for queue, card in data: | ||||
|         key = "MODULE_" + queue | ||||
|         value = r.get(key) | ||||
|         if value is not None: | ||||
|             timestamp, path = value.split(", ") | ||||
|             if timestamp is not None: | ||||
|                 startTime_readable = datetime.datetime.fromtimestamp(int(timestamp)) | ||||
|                 processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0] | ||||
|                 seconds = int((datetime.datetime.now() - startTime_readable).total_seconds()) | ||||
|                 newData.append( (queue, card, seconds) ) | ||||
|             else: | ||||
|                 newData.append( (queue, cards, 0) ) | ||||
| 
 | ||||
|     return newData | ||||
| 
 | ||||
| 
 | ||||
| def list_len(s): | ||||
|  |  | |||
|  | @ -221,11 +221,17 @@ function create_queue_table() { | |||
| 
 | ||||
|     for(i = 0; i < (glob_tabvar.row1).length;i++){ | ||||
|         var tr = document.createElement('TR') | ||||
|         for(j = 0; j < (glob_tabvar.row1[i]).length; j++){ | ||||
|         for(j = 0; j < 2; j++){ | ||||
|             var td = document.createElement('TD') | ||||
|             td.appendChild(document.createTextNode(glob_tabvar.row1[i][j])); | ||||
|             tr.appendChild(td) | ||||
|         } | ||||
|         if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2) | ||||
|             tr.className += " danger"; | ||||
|         else if (parseInt(glob_tabvar.row1[i][2]) > 60*1) | ||||
|             tr.className += " warning"; | ||||
|         else | ||||
|             tr.className += " success"; | ||||
|         tableBody.appendChild(tr); | ||||
|     } | ||||
|     Tablediv.appendChild(table); | ||||
|  |  | |||