Merge branch 'master' of github.com:CIRCL/AIL-framework

2016-08-25 12:33:31 +00:00 · 2016-08-25 12:33:31 +00:00 · 1ab08d4a05
parent 7cb9d937d6 034a558cba
commit 1ab08d4a05
30 changed files with 383 additions and 262 deletions
--- a/README.md
+++ b/README.md
@ -9,8 +9,34 @@ AIL framework - Framework for Analysis of Information Leaks

 AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information.

-![Dashboard](./doc/screenshots/DashboardAIL.png?raw=true "AIL framework dashboard")
-![Trending](./doc/screenshots/WordtrendingAIL.png?raw=true "AIL framework wordtrending")
+![Dashboard](./doc/screenshots/dashboard.png?raw=true "AIL framework dashboard")
+
+Trending charts
+---------------
+
+![Trending-Web](./doc/screenshots/trending-web.png?raw=true "AIL framework webtrending")
+![Trending-Modules](./doc/screenshots/trending-module.png?raw=true "AIL framework modulestrending")
+
+Browsing
+--------
+
+![Browse-Pastes](./doc/screenshots/browse-important.png?raw=true "AIL framework browseImportantPastes")
+
+Sentiment analysis
+------------------
+
+![Sentiment](./doc/screenshots/sentiment.png?raw=true "AIL framework sentimentanalysis")
+
+Terms manager and occurence
+---------------------------
+
+![Term-Manager](./doc/screenshots/terms-manager.png?raw=true "AIL framework termManager")
+
+## Top terms
+
+![Term-Top](./doc/screenshots/terms-top.png?raw=true "AIL framework termTop")
+![Term-Plot](./doc/screenshots/terms-plot.png?raw=true "AIL framework termPlot")
+

 AIL framework screencast: https://www.youtube.com/watch?v=9idfHCIMzBY

@ -26,6 +52,9 @@ Features
 * Module for extracting Tor .onion addresses (to be further processed for analysis)
 * Extracting and validating potential hostnames (e.g. to feed Passive DNS systems)
 * A full-text indexer module to index unstructured information
+* Modules and web statistics 
+* Global sentiment analysis for each providers based on nltk vader module
+* Terms tracking and occurence
 * Many more modules for extracting phone numbers, credentials and others

 Installation
@ -48,6 +77,7 @@ linux based distributions, you can replace it with [installing_deps_archlinux.sh

 There is also a [Travis file](.travis.yml) used for automating the installation that can be used to build and install AIL on other systems.

+
 Starting AIL web interface
 --------------------------

@ -94,6 +124,7 @@ Redis and LevelDB overview
 *                          DB 0 - Cache hostname/dns
 * Redis on TCP port 6380 - Redis Pub-Sub only
 * Redis on TCP port 6381 - DB 0 - Queue and Paste content LRU cache
+* Redis on TCP port 6382 - DB 1-4 - Trending, terms and sentiments
 * LevelDB on TCP port <year> - Lines duplicate

 LICENSE
--- a/bin/Attributes.py
+++ b/bin/Attributes.py
--- a/bin/CreditCards.py
+++ b/bin/CreditCards.py
@ -66,7 +66,7 @@ if __name__ == "__main__":
                    publisher.warning('{}Checked {} valid number(s)'.format(
                        to_print, len(creditcard_set)))
                    #Send to duplicate
-                    p.populate_set_out(filepath, 'Duplicate')
+                    p.populate_set_out(filename, 'Duplicate')
                    #send to Browse_warning_paste
                    p.populate_set_out('creditcard;{}'.format(filename), 'BrowseWarningPaste')
                else:
--- a/bin/Curve_manage_top_sets.py
+++ b/bin/Curve_manage_top_sets.py
@ -22,8 +22,8 @@ from pubsublogger import publisher
 from packages import lib_words
 import datetime
 import calendar
-
-from Helper import Process
+import os
+import ConfigParser

 # Config Variables
 Refresh_rate = 60*5 #sec
@ -96,13 +96,19 @@ if __name__ == '__main__':
    # Script is the default channel used for the modules.
    publisher.channel = 'Script'

-    config_section = 'CurveManageTopSets'
-    p = Process(config_section)
+    configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
+    if not os.path.exists(configfile):
+        raise Exception('Unable to find the configuration file. \
+                        Did you set environment variables? \
+                        Or activate the virtualenv.')
+    
+    cfg = ConfigParser.ConfigParser()
+    cfg.read(configfile)

    server_term = redis.StrictRedis(
-        host=p.config.get("Redis_Level_DB_TermFreq", "host"),
-        port=p.config.get("Redis_Level_DB_TermFreq", "port"),
-        db=p.config.get("Redis_Level_DB_TermFreq", "db"))
+        host=cfg.get("Redis_Level_DB_TermFreq", "host"),
+        port=cfg.getint("Redis_Level_DB_TermFreq", "port"),
+        db=cfg.getint("Redis_Level_DB_TermFreq", "db"))

    publisher.info("Script Curve_manage_top_set started")

@ -113,11 +119,6 @@ if __name__ == '__main__':

    while True:
        # Get one message from the input queue (module only work if linked with a queue)
-        message = p.get_from_set()
-        if message is None:
-            publisher.debug("{} queue is empty, waiting".format(config_section))
-            print 'sleeping'
-            time.sleep(Refresh_rate) # sleep a long time then manage the set
-            manage_top_set()
-            continue
+        time.sleep(Refresh_rate) # sleep a long time then manage the set
+        manage_top_set()

--- a/bin/Duplicate_ssdeep.py
+++ b/bin/Duplicate_ssdeep.py
@ -1,182 +0,0 @@
-#!/usr/bin/env python2
-# -*-coding:UTF-8 -*
-
-"""
-The Duplicate module
-====================
-
-This huge module is, in short term, checking duplicates.
-
-Requirements:
-------------
-
-
-"""
-import redis
-import os
-import time
-import datetime
-import json
-import ssdeep
-from packages import Paste
-from pubsublogger import publisher
-
-from Helper import Process
-
-if __name__ == "__main__":
-    publisher.port = 6380
-    publisher.channel = "Script"
-
-    config_section = 'Duplicates'
-    save_dico_and_reload = 1 #min
-    time_1 = time.time()
-    flag_reload_from_disk = True
-    flag_write_to_disk = False
-
-    p = Process(config_section)
-
-    # REDIS #
-    # DB OBJECT & HASHS ( DISK )
-    # FIXME increase flexibility
-    dico_redis = {}
-    for year in xrange(2013, datetime.date.today().year+1):
-        for month in xrange(0, 16):
-            dico_redis[str(year)+str(month).zfill(2)] = redis.StrictRedis(
-                host=p.config.get("Redis_Level_DB", "host"), port=year,
-                db=month)
-	    #print("dup: "+str(year)+str(month).zfill(2)+"\n")
-
-    # FUNCTIONS #
-    publisher.info("Script duplicate started")
-
-    dicopath = os.path.join(os.environ['AIL_HOME'],
-                             p.config.get("Directories", "dicofilters"))
-
-    dico_path_set = set()
-    while True:
-        try:
-            hash_dico = {}
-            dupl = []
-
-            x = time.time()
-
-            message = p.get_from_set()
-            if message is not None:
-                path = message
-                PST = Paste.Paste(path)
-            else:
-                publisher.debug("Script Attribute is idling 10s")
-                time.sleep(10)
-                continue
-
-            PST._set_p_hash_kind("ssdeep")
-
-            # Assignate the correct redis connexion
-            r_serv1 = dico_redis[PST.p_date.year + PST.p_date.month]
-
-            # Creating the dicor name: dicoyyyymm
-            filedicopath = os.path.join(dicopath, 'dico' + PST.p_date.year +
-                                         PST.p_date.month)
-            filedicopath_today = filedicopath
-
-            # Save I/O
-            if time.time() - time_1 > save_dico_and_reload*60:
-                flag_write_to_disk = True
-
-            if os.path.exists(filedicopath):
-                if flag_reload_from_disk == True:
-                    flag_reload_from_disk = False
-                    print 'Reloading'
-                    with open(filedicopath, 'r') as fp:
-                        today_dico = json.load(fp)
-            else:
-                today_dico = {}
-                with open(filedicopath, 'w') as fp:
-                    json.dump(today_dico, fp)
-
-            # For now, just use monthly dico
-            dico_path_set.add(filedicopath)
-
-            # UNIQUE INDEX HASHS TABLE
-            yearly_index = str(datetime.date.today().year)+'00'
-            r_serv0 = dico_redis[yearly_index]
-            r_serv0.incr("current_index")
-            index = r_serv0.get("current_index")+str(PST.p_date)
-            
-            # For each dico
-            opened_dico = []
-            for dico in dico_path_set:
-                # Opening dico
-                if dico == filedicopath_today:
-                    opened_dico.append([dico, today_dico])
-                else:
-                    with open(dico, 'r') as fp:
-                        opened_dico.append([dico, json.load(fp)])
-
-              
-            #retrieve hash from paste
-            paste_hash = PST._get_p_hash()
-            
-            # Go throught the Database of the dico (of the month)
-            threshold_dup = 99 
-            for dico_name, dico in opened_dico:
-                for dico_key, dico_hash in dico.items():
-                    percent = ssdeep.compare(dico_hash, paste_hash)
-                    if percent > threshold_dup:
-                        db = dico_name[-6:]
-                        # Go throught the Database of the dico filter (month)
-                        r_serv_dico = dico_redis[db]
-                        
-                        # index of paste
-                        index_current = r_serv_dico.get(dico_hash)
-                        paste_path = r_serv_dico.get(index_current)
-                        if paste_path != None:
-                            hash_dico[dico_hash] = (paste_path, percent)
-
-                        #print 'comparing: ' + str(dico_hash[:20]) + '  and  ' + str(paste_hash[:20]) + ' percentage: ' + str(percent)
-                        print '   '+ PST.p_path[44:]  +', '+ paste_path[44:] + ', ' + str(percent)
-
-            # Add paste in DB to prevent its analyse twice
-            # HASHTABLES PER MONTH (because of r_serv1 changing db)
-            r_serv1.set(index, PST.p_path)
-            r_serv1.sadd("INDEX", index)
-            # Adding the hash in Redis
-            r_serv1.set(paste_hash, index)
-            r_serv1.sadd("HASHS", paste_hash)
-    ##################### Similarity found  #######################
-
-            # if there is data in this dictionnary
-            if len(hash_dico) != 0:
-                for dico_hash, paste_tuple in hash_dico.items():
-                    paste_path, percent = paste_tuple
-                    dupl.append((paste_path, percent))
-
-                # Creating the object attribute and save it.
-                to_print = 'Duplicate;{};{};{};'.format(
-                    PST.p_source, PST.p_date, PST.p_name)
-                if dupl != []:
-                    PST.__setattr__("p_duplicate", dupl)
-                    PST.save_attribute_redis("p_duplicate", dupl)
-                    publisher.info('{}Detected {}'.format(to_print, len(dupl)))
-                    print '{}Detected {}'.format(to_print, len(dupl))
-
-                y = time.time()
-
-                publisher.debug('{}Processed in {} sec'.format(to_print, y-x))
-           
-
-            # Adding the hash in the dico of the month
-            today_dico[index] = paste_hash
-
-            if flag_write_to_disk:
-                time_1 = time.time()
-                flag_write_to_disk = False
-                flag_reload_from_disk = True
-                print 'writing'
-                with open(filedicopath, 'w') as fp:
-                    json.dump(today_dico, fp)
-        except IOError:
-            to_print = 'Duplicate;{};{};{};'.format(
-                PST.p_source, PST.p_date, PST.p_name)
-            print "CRC Checksum Failed on :", PST.p_path
-            publisher.error('{}CRC Checksum Failed'.format(to_print))
--- a/bin/Duplicate_ssdeep_v2.py
+++ b/bin/Duplicate_ssdeep_v2.py
--- a/bin/Duplicates_old.py
+++ b/bin/Duplicates_old.py
--- a/bin/Helper.py
+++ b/bin/Helper.py
@ -16,6 +16,7 @@ import ConfigParser
 import os
 import zmq
 import time
+import datetime
 import json


@ -132,7 +133,25 @@ class Process(object):
        in_set = self.subscriber_name + 'in'
        self.r_temp.hset('queues', self.subscriber_name,
                         int(self.r_temp.scard(in_set)))
-        return self.r_temp.spop(in_set)
+        message = self.r_temp.spop(in_set)
+        timestamp = int(time.mktime(datetime.datetime.now().timetuple()))
+        dir_name = os.environ['AIL_HOME']+self.config.get('Directories', 'pastes')
+
+        if message is None:
+            return None
+
+        else:
+            try:
+                path = message.split(".")[-2].split("/")[-1]
+                value = str(timestamp) + ", " + path
+                self.r_temp.set("MODULE_"+self.subscriber_name, value)
+                return message
+
+            except:
+                path = "?"
+                value = str(timestamp) + ", " + path
+                self.r_temp.set("MODULE_"+self.subscriber_name, value)
+                return message

    def populate_set_out(self, msg, channel=None):
        # multiproc
--- a/bin/LAUNCH.sh
+++ b/bin/LAUNCH.sh
@ -114,31 +114,31 @@ function launching_scripts {

    screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
+    screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
+    screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
+    screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
+    screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
    sleep 0.1
    screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
    sleep 0.1
    screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
+    screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
    sleep 0.1
    screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
    sleep 0.1
    screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
+    screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
    sleep 0.1
    screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
    sleep 0.1
    screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
+    screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
    sleep 0.1
    screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
    sleep 0.1
@ -158,7 +158,9 @@ function launching_scripts {
    sleep 0.1
    screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
    sleep 0.1
-    screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
+    screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0; read x'

 }

--- a/bin/Lines.py
+++ b/bin/Lines.py
--- a/bin/ModuleInformation.py
+++ b/bin/ModuleInformation.py
@ -0,0 +1,155 @@
+#!/usr/bin/env python2
+# -*-coding:UTF-8 -*
+
+import time
+import datetime
+import redis
+import os
+import signal
+import argparse
+from subprocess import PIPE, Popen
+import ConfigParser
+import json
+from terminaltables import AsciiTable
+import textwrap
+
+# CONFIG VARIABLES
+threshold_stucked_module = 60*60*1 #1 hour
+log_filename = "../logs/moduleInfo.log"
+command_search_pid = "ps a -o pid,cmd | grep {}"
+command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\""
+
+
+def getPid(module):
+    p = Popen([command_search_pid.format(module+".py")], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
+    for line in p.stdout:
+        splittedLine = line.split()
+        if 'python2' in splittedLine:
+            return int(splittedLine[0])
+        else:
+            return None
+
+
+def kill_module(module):
+    print ''
+    print '-> trying to kill module:', module
+
+    pid = getPid(module)
+    if pid is not None:
+        os.kill(pid, signal.SIGUSR1)
+        time.sleep(1)
+        if getPid(module) is None:
+            print module, 'has been killed'
+            print 'restarting', module, '...'
+            p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
+
+        else:
+            print 'killing failed, retrying...'
+            time.sleep(3)
+            os.kill(pid, signal.SIGUSR1)
+            time.sleep(1)
+            if getPid(module) is None:
+                print module, 'has been killed'
+                print 'restarting', module, '...'
+                p2 = Popen([command_restart_module.format(module, module)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True)
+            else:
+                print 'killing failed!'
+    time.sleep(7)
+
+
+if __name__ == "__main__":
+
+    parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.')
+    parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate')
+    parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)')
+
+    args = parser.parse_args()
+
+    configfile = os.path.join(os.environ['AIL_BIN'], 'packages/config.cfg')
+    if not os.path.exists(configfile):
+        raise Exception('Unable to find the configuration file. \
+                        Did you set environment variables? \
+                        Or activate the virtualenv.')
+
+    cfg = ConfigParser.ConfigParser()
+    cfg.read(configfile)
+
+    # REDIS #
+    server = redis.StrictRedis(
+        host=cfg.get("Redis_Queues", "host"),
+        port=cfg.getint("Redis_Queues", "port"),
+        db=cfg.getint("Redis_Queues", "db"))
+
+    while True:
+
+        num = 0
+        printarray1 = []
+        printarray2 = []
+        for queue, card in server.hgetall("queues").iteritems():
+            key = "MODULE_" + queue
+            value = server.get(key)
+            if value is not None:
+                timestamp, path = value.split(", ")
+                if timestamp is not None and path is not None:
+                    num += 1
+                    startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
+                    processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
+
+                    if int(card) > 0:
+                        if int((datetime.datetime.now() - startTime_readable).total_seconds()) > threshold_stucked_module:
+                            log = open(log_filename, 'a')
+                            log.write(json.dumps([queue, card, str(startTime_readable), str(processed_time_readable), path]) + "\n")
+                            if args.autokill == 1:
+                                kill_module(queue)
+
+                        printarray1.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
+
+                    else:
+                        printarray2.append([str(num), str(queue), str(card), str(startTime_readable), str(processed_time_readable), str(path)])
+
+        printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
+        printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True)
+        printarray1.insert(0,["#", "Queue", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"])
+        printarray2.insert(0,["#", "Queue", "Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"])
+
+        os.system('clear')
+        t1 = AsciiTable(printarray1, title="Working queues")
+        t1.column_max_width(1)
+        if not t1.ok:
+                longest_col = t1.column_widths.index(max(t1.column_widths))
+                max_length_col = t1.column_max_width(longest_col)
+                if max_length_col > 0:
+                    for i, content in enumerate(t1.table_data):
+                        if len(content[longest_col]) > max_length_col:
+                            temp = ''
+                            for l in content[longest_col].splitlines():
+                                if len(l) > max_length_col:
+                                    temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
+                                else:
+                                    temp += l + '\n'
+                                content[longest_col] = temp.strip()
+                        t1.table_data[i] = content
+
+        t2 = AsciiTable(printarray2, title="Idling queues")
+        t2.column_max_width(1)
+        if not t2.ok:
+                longest_col = t2.column_widths.index(max(t2.column_widths))
+                max_length_col = t2.column_max_width(longest_col)
+                if max_length_col > 0:
+                    for i, content in enumerate(t2.table_data):
+                        if len(content[longest_col]) > max_length_col:
+                            temp = ''
+                            for l in content[longest_col].splitlines():
+                                if len(l) > max_length_col:
+                                    temp += '\n'.join(textwrap.wrap(l, max_length_col)) + '\n'
+                                else:
+                                    temp += l + '\n'
+                                content[longest_col] = temp.strip()
+                        t2.table_data[i] = content
+
+
+        print t1.table
+        print '\n'
+        print t2.table
+
+        time.sleep(args.refresh)
--- a/bin/SentimentAnalysis.py
+++ b/bin/SentimentAnalysis.py
--- a/bin/Web.py
+++ b/bin/Web.py
--- a/bin/WebStats.py
+++ b/bin/WebStats.py
@ -77,12 +77,14 @@ def compute_progression(server, field_name, num_day, url_parsed):
                member_set = []
                for keyw in server.smembers(redis_progression_name_set):
                    member_set.append((keyw, int(server.hget(redis_progression_name, keyw))))
-                print member_set
                member_set.sort(key=lambda tup: tup[1])
                if member_set[0][1] < keyword_increase:
+                    print 'removing', member_set[0][0] + '('+str(member_set[0][1])+')', 'and adding', keyword, str(keyword_increase)
                    #remove min from set and add the new one
-                    server.srem(redis_progression_name_set, member_set[0])
+                    server.srem(redis_progression_name_set, member_set[0][0])
                    server.sadd(redis_progression_name_set, keyword)
+                    server.hdel(redis_progression_name, member_set[0][0])
+                    server.hset(redis_progression_name, keyword, keyword_increase)


 if __name__ == '__main__':
--- a/bin/launch_scripts.sh
+++ b/bin/launch_scripts.sh
@ -8,50 +8,52 @@ sleep 0.1

 echo -e $GREEN"\t* Launching ZMQ scripts"$DEFAULT

-screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Duplicate" bash -c './Duplicate_ssdeep_v2.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Attribute" bash -c './Attribute.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Line" bash -c './Line.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "DomainClassifier" bash -c './DomClassifier.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "CreditCard" bash -c './CreditCard.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Url" bash -c './Url.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Curve_topsets_manager" bash -c './Curve_manage_top_sets.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
-sleep 0.1
-screen -S "Script" -X screen -t "SentimentAnalyser" bash -c './SentimentAnalyser.py; read x'
+    screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x'
+    sleep 0.1
+    screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py; read x'
--- a/bin/packages/modules.cfg
+++ b/bin/packages/modules.cfg
@ -34,7 +34,7 @@ subscribe = Redis_Global
 publish = Redis_CreditCards,Redis_Mail,Redis_Onion,Redis_Web,Redis_Credential,Redis_SourceCode,Redis_Cve

 [CreditCards]
-subscribe = Redis_CreditCard
+subscribe = Redis_CreditCards
 publish = Redis_Duplicate,Redis_ModuleStats,Redis_BrowseWarningPaste

 [Mail]
--- a/doc/generate_graph_data.py
+++ b/doc/generate_graph_data.py
@ -0,0 +1,62 @@
+#!/usr/bin/env python2
+# -*-coding:UTF-8 -*
+
+content = ""
+modules = {}
+all_modules = []
+curr_module = ""
+streamingPub = {}
+streamingSub = {}
+
+with open('../bin/packages/modules.cfg', 'r') as f:
+    for line in f:
+        if line[0] != '#':
+            if line[0] == '[':
+                curr_name = line.replace('[','').replace(']','').replace('\n', '').replace(' ', '')
+                all_modules.append(curr_name)
+                modules[curr_name] = {'sub': [], 'pub': []}
+                curr_module = curr_name
+            elif curr_module != "": # searching for sub or pub
+                if line.startswith("subscribe"):
+                    curr_subscribers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
+                    modules[curr_module]['sub'] = curr_subscribers
+                    for sub in curr_subscribers:
+                        streamingSub[sub] = curr_module
+
+                elif line.startswith("publish"):
+                    curr_publishers = [w for w in line.replace('\n', '').replace(' ', '').split('=')[1].split(',')]
+                    modules[curr_module]['pub'] = curr_publishers
+                    for pub in curr_publishers:
+                        streamingPub[pub] = curr_module
+                else:
+                    continue
+
+    output_set_graph = set()
+
+    for module in modules.keys():
+        for stream_in in modules[module]['sub']:
+            if stream_in not in streamingPub.keys():
+                output_set_graph.add("\"" + stream_in + "\" [color=darkorange1] ;\n")
+                output_set_graph.add("\"" + stream_in + "\"" + "->" + module + ";\n")
+            else:
+                output_set_graph.add("\"" + streamingPub[stream_in] + "\"" + "->" + module + ";\n")
+
+        for stream_out in modules[module]['pub']:
+            if stream_out not in streamingSub.keys():
+                output_set_graph.add("\"" + stream_out + "\" [color=darkorange1] ;\n")
+                output_set_graph.add("\"" + stream_out + "\"" + "->" + module + ";\n")
+            else:
+                output_set_graph.add("\"" + module + "\"" + "->" + streamingSub[stream_out] + ";\n")
+
+
+    output_text_graph = ""
+    output_text_graph += "digraph unix {\n"\
+                              "graph [pad=\"0.5\"];\n"\
+                              "size=\"25,25\";\n"\
+                              "node [color=lightblue2, style=filled];\n"
+
+    for elem in output_set_graph:
+        output_text_graph += elem
+
+    output_text_graph += "}"
+    print output_text_graph
--- a/doc/generate_modules_data_flow_graph.sh
+++ b/doc/generate_modules_data_flow_graph.sh
@ -0,0 +1,3 @@
+#!/bin/bash
+
+python generate_graph_data.py | dot -T png -o module-data-flow.png
--- a/doc/module-data-flow.png
+++ b/doc/module-data-flow.png
--- a/doc/screenshots/browse-important.png
+++ b/doc/screenshots/browse-important.png
--- a/doc/screenshots/dashboard.png
+++ b/doc/screenshots/dashboard.png
--- a/doc/screenshots/terms-manager.png
+++ b/doc/screenshots/terms-manager.png
--- a/doc/screenshots/terms-plot.png
+++ b/doc/screenshots/terms-plot.png
--- a/doc/screenshots/terms-top.png
+++ b/doc/screenshots/terms-top.png
--- a/doc/screenshots/trending-module.png
+++ b/doc/screenshots/trending-module.png
--- a/doc/screenshots/trending-web.png
+++ b/doc/screenshots/trending-web.png
--- a/installing_deps.sh
+++ b/installing_deps.sh
@ -17,6 +17,9 @@ sudo apt-get install libadns1 libadns1-dev
 #Needed for redis-lvlDB
 sudo apt-get install libev-dev libgmp-dev

+#Need for generate-data-flow graph
+sudo apt-get install graphviz
+
 #needed for mathplotlib
 test ! -L /usr/include/ft2build.h && sudo ln -s freetype2/ft2build.h /usr/include/
 sudo easy_install -U distribute
@ -69,6 +72,7 @@ echo export AIL_LEVELDB=$(pwd)/redis-leveldb/ >> ./AILENV/bin/activate

 mkdir -p $AIL_HOME/{PASTES,Blooms,dumps}
 mkdir -p $AIL_HOME/LEVEL_DB_DATA/2016
+mkdir -p $AIL_HOME/LEVEL_DB_DATA/3016

 pip install -U pip
 pip install -r pip_packages_requirement.txt
@ -83,5 +87,6 @@ pushd tlsh/py_ext
 python setup.py build
 python setup.py install

-# Download the necessary NLTK corpora
+# Download the necessary NLTK corpora and sentiment vader
 HOME=$(pwd) python -m textblob.download_corpora
+python -m nltk.downloader vader_lexicon
--- a/pip_packages_requirement.txt
+++ b/pip_packages_requirement.txt
@ -10,6 +10,7 @@ textblob
 numpy
 matplotlib
 networkx
+terminaltables

 #Tokeniser
 nltk
--- a/var/www/Flask_server.py
+++ b/var/www/Flask_server.py
@ -81,8 +81,22 @@ def event_stream():

 def get_queues(r):
    # We may want to put the llen in a pipeline to do only one query.
-    return [(queue, int(card)) for queue, card in
-            r.hgetall("queues").iteritems()]
+    data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()]
+    newData = []
+    for queue, card in data:
+        key = "MODULE_" + queue
+        value = r.get(key)
+        if value is not None:
+            timestamp, path = value.split(", ")
+            if timestamp is not None:
+                startTime_readable = datetime.datetime.fromtimestamp(int(timestamp))
+                processed_time_readable = str((datetime.datetime.now() - startTime_readable)).split('.')[0]
+                seconds = int((datetime.datetime.now() - startTime_readable).total_seconds())
+                newData.append( (queue, card, seconds) )
+            else:
+                newData.append( (queue, cards, 0) )
+
+    return newData


 def list_len(s):
--- a/var/www/static/js/indexjavascript.js
+++ b/var/www/static/js/indexjavascript.js
@ -221,11 +221,17 @@ function create_queue_table() {

    for(i = 0; i < (glob_tabvar.row1).length;i++){
        var tr = document.createElement('TR')
-        for(j = 0; j < (glob_tabvar.row1[i]).length; j++){
+        for(j = 0; j < 2; j++){
            var td = document.createElement('TD')
            td.appendChild(document.createTextNode(glob_tabvar.row1[i][j]));
            tr.appendChild(td)
        }
+        if (parseInt(glob_tabvar.row1[i][2]) > 60*2 && parseInt(glob_tabvar.row1[i][1]) > 2)
+            tr.className += " danger";
+        else if (parseInt(glob_tabvar.row1[i][2]) > 60*1)
+            tr.className += " warning";
+        else
+            tr.className += " success";
        tableBody.appendChild(tr);
    }
    Tablediv.appendChild(table);