diff --git a/.dockerignore b/.dockerignore new file mode 100644 index 00000000..175d1d68 --- /dev/null +++ b/.dockerignore @@ -0,0 +1,2 @@ +*.git +*.md diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 00000000..f742e9ea --- /dev/null +++ b/Dockerfile @@ -0,0 +1,9 @@ +FROM ubuntu:14.04 + +RUN mkdir /opt/AIL && apt-get update -y \ + && apt-get install git python-dev build-essential \ + libffi-dev libssl-dev libfuzzy-dev wget -y +ADD . /opt/AIL +WORKDIR /opt/AIL +RUN ./installing_deps.sh && cd var/www/ && ./update_thirdparty.sh +CMD bash docker_start.sh diff --git a/README.md b/README.md index 95398f3b..f5cdb7b4 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ AIL AIL framework - Framework for Analysis of Information Leaks -AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information. +AIL is a modular framework to analyse potential information leaks from unstructured data sources like pastes from Pastebin or similar services or unstructured data streams. AIL framework is flexible and can be extended to support other functionalities to mine sensitive information. ![Dashboard](./doc/screenshots/dashboard.png?raw=true "AIL framework dashboard") @@ -38,7 +38,7 @@ Terms manager and occurence ![Term-Plot](./doc/screenshots/terms-plot.png?raw=true "AIL framework termPlot") -AIL framework screencast: https://www.youtube.com/watch?v=9idfHCIMzBY +[AIL framework screencast](https://www.youtube.com/watch?v=1_ZrZkRKmNo) Features -------- @@ -54,7 +54,7 @@ Features * A full-text indexer module to index unstructured information * Modules and web statistics * Global sentiment analysis for each providers based on nltk vader module -* Terms tracking and occurence +* Terms tracking and occurrence * Many more modules for extracting phone numbers, credentials and others Installation diff --git a/bin/Browse_warning_paste.py b/bin/BrowseWarningPaste.py similarity index 97% rename from bin/Browse_warning_paste.py rename to bin/BrowseWarningPaste.py index 49444979..01704902 100755 --- a/bin/Browse_warning_paste.py +++ b/bin/BrowseWarningPaste.py @@ -24,7 +24,7 @@ if __name__ == "__main__": publisher.port = 6380 publisher.channel = "Script" - config_section = 'Browse_warning_paste' + config_section = 'BrowseWarningPaste' p = Process(config_section) diff --git a/bin/Helper.py b/bin/Helper.py index e7338ceb..05b73bf3 100755 --- a/bin/Helper.py +++ b/bin/Helper.py @@ -119,13 +119,7 @@ class Process(object): port=self.config.get('RedisPubSub', 'port'), db=self.config.get('RedisPubSub', 'db')) - self.moduleNum = 1 - for i in range(1, 50): - curr_num = self.r_temp.get("MODULE_"+self.subscriber_name + "_" + str(i)) - if curr_num is None: - self.moduleNum = i - break - + self.moduleNum = os.getpid() def populate_set_in(self): @@ -158,12 +152,14 @@ class Process(object): path = "?" value = str(timestamp) + ", " + path self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) + self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) return message except: path = "?" value = str(timestamp) + ", " + path self.r_temp.set("MODULE_"+self.subscriber_name + "_" + str(self.moduleNum), value) + self.r_temp.sadd("MODULE_TYPE_"+self.subscriber_name, str(self.moduleNum)) return message def populate_set_out(self, msg, channel=None): diff --git a/bin/LAUNCH.sh b/bin/LAUNCH.sh index b50a75d6..d7c31472 100755 --- a/bin/LAUNCH.sh +++ b/bin/LAUNCH.sh @@ -158,7 +158,7 @@ function launching_scripts { sleep 0.1 screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' sleep 0.1 - screen -S "Script" -X screen -t "Browse_warning_paste" bash -c './Browse_warning_paste.py; read x' + screen -S "Script" -X screen -t "BrowseWarningPaste" bash -c './BrowseWarningPaste.py; read x' sleep 0.1 screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' diff --git a/bin/ModuleInformation.py b/bin/ModuleInformation.py index d783418f..1538f57a 100755 --- a/bin/ModuleInformation.py +++ b/bin/ModuleInformation.py @@ -29,6 +29,7 @@ import textwrap threshold_stucked_module = 60*60*1 #1 hour log_filename = "../logs/moduleInfo.log" command_search_pid = "ps a -o pid,cmd | grep {}" +command_search_name = "ps a -o pid,cmd | grep {}" command_restart_module = "screen -S \"Script\" -X screen -t \"{}\" bash -c \"./{}.py; read x\"" @@ -45,6 +46,23 @@ def clearRedisModuleInfo(): for k in server.keys("MODULE_*"): server.delete(k) +def cleanRedis(): + for k in server.keys("MODULE_TYPE_*"): + moduleName = k[12:].split('_')[0] + for pid in server.smembers(k): + flag_pid_valid = False + proc = Popen([command_search_name.format(pid)], stdin=PIPE, stdout=PIPE, bufsize=1, shell=True) + for line in proc.stdout: + splittedLine = line.split() + if ('python2' in splittedLine or 'python' in splittedLine) and "./"+moduleName+".py" in splittedLine: + flag_pid_valid = True + + if not flag_pid_valid: + print flag_pid_valid, 'cleaning', pid, 'in', k + server.srem(k, pid) + time.sleep(5) + + def kill_module(module): print '' print '-> trying to kill module:', module @@ -76,8 +94,8 @@ if __name__ == "__main__": parser = argparse.ArgumentParser(description='Show info concerning running modules and log suspected stucked modules. May be use to automatically kill and restart stucked one.') parser.add_argument('-r', '--refresh', type=int, required=False, default=1, help='Refresh rate') - parser.add_argument('-k', '--autokill', type=int, required=True, default=1, help='Enable auto kill option (1 for TRUE, anything else for FALSE)') - parser.add_argument('-c', '--clear', type=int, required=False, default=1, help='Clear the current module information (Used to clear data from old launched modules)') + parser.add_argument('-k', '--autokill', type=int, required=False, default=0, help='Enable auto kill option (1 for TRUE, anything else for FALSE)') + parser.add_argument('-c', '--clear', type=int, required=False, default=0, help='Clear the current module information (Used to clear data from old launched modules)') args = parser.parse_args() @@ -99,6 +117,7 @@ if __name__ == "__main__": if args.clear == 1: clearRedisModuleInfo() + lastTime = datetime.datetime.now() module_file_array = set() with open('../doc/all_modules.txt', 'r') as module_file: @@ -108,20 +127,15 @@ if __name__ == "__main__": while True: all_queue = set() - curr_range = 50 printarray1 = [] printarray2 = [] printarray3 = [] for queue, card in server.hgetall("queues").iteritems(): all_queue.add(queue) key = "MODULE_" + queue + "_" - for i in range(1, 50): - curr_num = server.get("MODULE_"+ queue + "_" + str(i)) - if curr_num is None: - curr_range = i - break + keySet = "MODULE_TYPE_" + queue - for moduleNum in range(1, curr_range): + for moduleNum in server.smembers(keySet): value = server.get(key + str(moduleNum)) if value is not None: timestamp, path = value.split(", ") @@ -147,8 +161,8 @@ if __name__ == "__main__": printarray1.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) printarray2.sort(lambda x,y: cmp(x[4], y[4]), reverse=True) - printarray1.insert(0,["Queue", "#", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) - printarray2.insert(0,["Queue", "#","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) + printarray1.insert(0,["Queue", "PID", "Amount", "Paste start time", "Processing time for current paste (H:M:S)", "Paste hash"]) + printarray2.insert(0,["Queue", "PID","Amount", "Paste start time", "Time since idle (H:M:S)", "Last paste hash"]) printarray3.insert(0,["Queue", "State"]) os.system('clear') @@ -195,4 +209,7 @@ if __name__ == "__main__": print '\n' print t3.table + if (datetime.datetime.now() - lastTime).total_seconds() > args.refresh*5: + lastTime = datetime.datetime.now() + cleanRedis() time.sleep(args.refresh) diff --git a/bin/packages/Paste.py b/bin/packages/Paste.py index d91018bd..6d18e846 100755 --- a/bin/packages/Paste.py +++ b/bin/packages/Paste.py @@ -264,7 +264,7 @@ class Paste(object): def _get_p_duplicate(self): self.p_duplicate = self.store.hget(self.p_path, "p_duplicate") - return self.p_duplicate if self.p_duplicate is not None else [] + return self.p_duplicate if self.p_duplicate is not None else '[]' def save_all_attributes_redis(self, key=None): """ diff --git a/bin/packages/modules.cfg b/bin/packages/modules.cfg index 0243038a..53bbb2a6 100644 --- a/bin/packages/modules.cfg +++ b/bin/packages/modules.cfg @@ -63,7 +63,7 @@ publish = Redis_BrowseWarningPaste,Redis_Duplicate [ModuleStats] subscribe = Redis_ModuleStats -[Browse_warning_paste] +[BrowseWarningPaste] subscribe = Redis_BrowseWarningPaste #[send_to_queue] @@ -88,9 +88,6 @@ publish = Redis_BrowseWarningPaste,Redis_Duplicate subscribe = Redis_Global publish = Redis_Duplicate,Redis_BrowseWarningPaste -[SourceCode] -subscribe = Redis_SourceCode - [Keys] subscribe = Redis_Global publish = Redis_Duplicate,Redis_BrowseWarningPaste diff --git a/doc/screenshots/terms-plot.png b/doc/screenshots/terms-plot.png index 32647906..4d33466d 100644 Binary files a/doc/screenshots/terms-plot.png and b/doc/screenshots/terms-plot.png differ diff --git a/doc/screenshots/terms-top.png b/doc/screenshots/terms-top.png index e833ac35..fc75aed8 100644 Binary files a/doc/screenshots/terms-top.png and b/doc/screenshots/terms-top.png differ diff --git a/doc/screenshots/trending-module.png b/doc/screenshots/trending-module.png index 1330ead8..48304059 100644 Binary files a/doc/screenshots/trending-module.png and b/doc/screenshots/trending-module.png differ diff --git a/doc/screenshots/trending-web.png b/doc/screenshots/trending-web.png index cd7e510b..a98ff47f 100644 Binary files a/doc/screenshots/trending-web.png and b/doc/screenshots/trending-web.png differ diff --git a/docker_start.sh b/docker_start.sh new file mode 100755 index 00000000..f4bf25bb --- /dev/null +++ b/docker_start.sh @@ -0,0 +1,75 @@ +source ./AILENV/bin/activate +cd bin + +export PATH=$AIL_HOME:$PATH +export PATH=$AIL_REDIS:$PATH +export PATH=$AIL_LEVELDB:$PATH +export AILENV=/opt/AIL + +conf_dir="${AIL_HOME}/configs/" + +screen -dmS "Redis" +screen -S "Redis" -X screen -t "6379" bash -c 'redis-server '$conf_dir'6379.conf ; read x' +screen -S "Redis" -X screen -t "6380" bash -c 'redis-server '$conf_dir'6380.conf ; read x' +screen -S "Redis" -X screen -t "6381" bash -c 'redis-server '$conf_dir'6381.conf ; read x' + +# For Words and curves +sleep 0.1 +screen -S "Redis" -X screen -t "6382" bash -c 'redis-server '$conf_dir'6382.conf ; read x' + +#Want to launch more level_db? +lvdbhost='127.0.0.1' +lvdbdir="${AIL_HOME}/LEVEL_DB_DATA/" +db1_y='2013' +db2_y='2014' +db3_y='2016' +db4_y='3016' +nb_db=13 + +screen -dmS "LevelDB" +#Add lines here with appropriates options. +screen -S "LevelDB" -X screen -t "2013" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2013/ -P '$db1_y' -M '$nb_db'; read x' +screen -S "LevelDB" -X screen -t "2014" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2014/ -P '$db2_y' -M '$nb_db'; read x' +screen -S "LevelDB" -X screen -t "2016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'2016/ -P '$db3_y' -M '$nb_db'; read x' + +# For Curve +screen -S "LevelDB" -X screen -t "3016" bash -c 'redis-leveldb -H '$lvdbhost' -D '$lvdbdir'3016/ -P '$db4_y' -M '$nb_db'; read x' + + +screen -dmS "Logging" +screen -S "Logging" -X screen -t "LogQueue" bash -c 'log_subscriber -p 6380 -c Queuing -l ../logs/; read x' +screen -S "Logging" -X screen -t "LogScript" bash -c 'log_subscriber -p 6380 -c Script -l ../logs/; read x' + +screen -dmS "Queue" +screen -S "Queue" -X screen -t "Queues" bash -c './launch_queues.py; read x' + +screen -dmS "Script" +screen -S "Script" -X screen -t "ModuleInformation" bash -c './ModuleInformation.py -k 0 -c 1; read x' +screen -S "Script" -X screen -t "Global" bash -c './Global.py; read x' +screen -S "Script" -X screen -t "Duplicates" bash -c './Duplicates.py; read x' +screen -S "Script" -X screen -t "Attributes" bash -c './Attributes.py; read x' +screen -S "Script" -X screen -t "Lines" bash -c './Lines.py; read x' +screen -S "Script" -X screen -t "DomClassifier" bash -c './DomClassifier.py; read x' +screen -S "Script" -X screen -t "Categ" bash -c './Categ.py; read x' +screen -S "Script" -X screen -t "Tokenize" bash -c './Tokenize.py; read x' +screen -S "Script" -X screen -t "CreditCards" bash -c './CreditCards.py; read x' +screen -S "Script" -X screen -t "Onion" bash -c './Onion.py; read x' +screen -S "Script" -X screen -t "Mail" bash -c './Mail.py; read x' +screen -S "Script" -X screen -t "Web" bash -c './Web.py; read x' +screen -S "Script" -X screen -t "Credential" bash -c './Credential.py; read x' +screen -S "Script" -X screen -t "Curve" bash -c './Curve.py; read x' +screen -S "Script" -X screen -t "CurveManageTopSets" bash -c './CurveManageTopSets.py; read x' +screen -S "Script" -X screen -t "Indexer" bash -c './Indexer.py; read x' +screen -S "Script" -X screen -t "Keys" bash -c './Keys.py; read x' +screen -S "Script" -X screen -t "Phone" bash -c './Phone.py; read x' +screen -S "Script" -X screen -t "Release" bash -c './Release.py; read x' +screen -S "Script" -X screen -t "Cve" bash -c './Cve.py; read x' +screen -S "Script" -X screen -t "WebStats" bash -c './WebStats.py; read x' +screen -S "Script" -X screen -t "ModuleStats" bash -c './ModuleStats.py; read x' +screen -S "Script" -X screen -t "SQLInjectionDetection" bash -c './SQLInjectionDetection.py; read x' +screen -S "Script" -X screen -t "BrowseWarningPaste" bash -c './BrowseWarningPaste.py; read x' +screen -S "Script" -X screen -t "SentimentAnalysis" bash -c './SentimentAnalysis.py; read x' + +cd $AILENV +cd var/www/ +python Flask_server.py diff --git a/installing_deps.sh b/installing_deps.sh index 225e3858..56e2f3c4 100755 --- a/installing_deps.sh +++ b/installing_deps.sh @@ -6,19 +6,19 @@ set -x sudo apt-get update sudo apt-get install python-pip python-virtualenv python-dev libfreetype6-dev \ - screen g++ python-tk unzip libsnappy-dev cmake + screen g++ python-tk unzip libsnappy-dev cmake -y #Needed for bloom filters -sudo apt-get install libssl-dev libfreetype6-dev python-numpy +sudo apt-get install libssl-dev libfreetype6-dev python-numpy -y # DNS deps -sudo apt-get install libadns1 libadns1-dev +sudo apt-get install libadns1 libadns1-dev -y #Needed for redis-lvlDB -sudo apt-get install libev-dev libgmp-dev +sudo apt-get install libev-dev libgmp-dev -y #Need for generate-data-flow graph -sudo apt-get install graphviz +sudo apt-get install graphviz -y #needed for mathplotlib sudo easy_install -U distribute diff --git a/var/www/Flask_server.py b/var/www/Flask_server.py index 3c4346f1..f3f9d71a 100755 --- a/var/www/Flask_server.py +++ b/var/www/Flask_server.py @@ -81,19 +81,13 @@ def event_stream(): def get_queues(r): # We may want to put the llen in a pipeline to do only one query. - data = [(queue, int(card)) for queue, card in r.hgetall("queues").iteritems()] newData = [] - - curr_range = 50 - for queue, card in data: + for queue, card in r.hgetall("queues").iteritems(): key = "MODULE_" + queue + "_" - for i in range(1, 50): - curr_num = r.get("MODULE_"+ queue + "_" + str(i)) - if curr_num is None: - curr_range = i - break + keySet = "MODULE_TYPE_" + queue - for moduleNum in range(1, curr_range): + for moduleNum in r.smembers(keySet): + value = r.get(key + str(moduleNum)) if value is not None: timestamp, path = value.split(", ") @@ -683,7 +677,6 @@ def terms_plot_tool_data(): else: value_range = [] for timestamp in range(range_start, range_end+oneDay, oneDay): - print timestamp, term value = r_serv_term.hget(timestamp, term) curr_value_range = int(value) if value is not None else 0 value_range.append([timestamp, curr_value_range]) diff --git a/var/www/static/js/indexjavascript.js b/var/www/static/js/indexjavascript.js index 1bdb29eb..e527aafa 100644 --- a/var/www/static/js/indexjavascript.js +++ b/var/www/static/js/indexjavascript.js @@ -207,7 +207,7 @@ function create_queue_table() { table.appendChild(tableHead); table.appendChild(tableBody); var heading = new Array(); - heading[0] = "Queue Name" + heading[0] = "Queue Name.PID" heading[1] = "Amount" var tr = document.createElement('TR'); tableHead.appendChild(tr); @@ -244,12 +244,8 @@ function create_queue_table() { Tablediv.appendChild(table); } -$(document).ready(function () { - if (typeof glob_tabvar == "undefined") - location.reload(); - if (typeof glob_tabvar.row1 == "undefined") - location.reload(); +function load_queues() { var data = []; var data2 = []; var tmp_tab = []; @@ -259,13 +255,17 @@ $(document).ready(function () { var x = new Date(); for (i = 0; i < glob_tabvar.row1.length; i++){ - if (glob_tabvar.row1[i][0] == 'Categ' || glob_tabvar.row1[i][0] == 'Curve'){ - tmp_tab2.push(0); - curves_labels2.push(glob_tabvar.row1[i][0]); + if (glob_tabvar.row1[i][0].split(".")[0] == 'Categ' || glob_tabvar.row1[i][0].split(".")[0] == 'Curve'){ + if (curves_labels2.indexOf(glob_tabvar.row1[i][0].split(".")[0]) == -1) { + tmp_tab2.push(0); + curves_labels2.push(glob_tabvar.row1[i][0].split(".")[0]); + } } else { - tmp_tab.push(0); - curves_labels.push(glob_tabvar.row1[i][0]); + if (curves_labels.indexOf(glob_tabvar.row1[i][0].split(".")[0]) == -1) { + tmp_tab.push(0); + curves_labels.push(glob_tabvar.row1[i][0].split(".")[0]); + } } } tmp_tab.unshift(x); @@ -324,19 +324,29 @@ $(document).ready(function () { update_values(); if($('#button-toggle-queues').prop('checked')){ + $("#queue-color-legend").show(); create_queue_table(); } else{ $("#queueing").html(''); + $("#queue-color-legend").hide(); } + queues_pushed = [] for (i = 0; i < (glob_tabvar.row1).length; i++){ - if (glob_tabvar.row1[i][0] == 'Categ' || glob_tabvar.row1[i][0] == 'Curve'){ - tmp_values2.push(glob_tabvar.row1[i][1]); + if (glob_tabvar.row1[i][0].split(".")[0] == 'Categ' || glob_tabvar.row1[i][0].split(".")[0] == 'Curve'){ + if (queues_pushed.indexOf(glob_tabvar.row1[i][0].split(".")[0]) == -1) { + queues_pushed.push(glob_tabvar.row1[i][0].split(".")[0]); + tmp_values2.push(parseInt(glob_tabvar.row1[i][1])); + } } else { - tmp_values.push(glob_tabvar.row1[i][1]); + if (queues_pushed.indexOf(glob_tabvar.row1[i][0].split(".")[0]) == -1) { + queues_pushed.push(glob_tabvar.row1[i][0].split(".")[0]); + tmp_values.push(parseInt(glob_tabvar.row1[i][1])); + } + } } tmp_values.unshift(x); @@ -375,7 +385,19 @@ $(document).ready(function () { // something went wrong, hide the canvas container document.getElementById('myCanvasContainer').style.display = 'none'; } +} +function manage_undefined() { + if (typeof glob_tabvar == "undefined") + setTimeout(function() { if (typeof glob_tabvar == "undefined") { manage_undefined(); } else { load_queues(); } }, 1000); + else if (typeof glob_tabvar.row1 == "undefined") + setTimeout(function() { if (typeof glob_tabvar.row1 == "undefined") { manage_undefined(); } else { load_queues(); } }, 1000); + else + load_queues(); +} + +$(document).ready(function () { + manage_undefined(); }); diff --git a/var/www/templates/index.html b/var/www/templates/index.html index 28c3aff9..8bbb59e1 100644 --- a/var/www/templates/index.html +++ b/var/www/templates/index.html @@ -66,7 +66,18 @@ Display queues
-
+
+ + + + + + + + +
Working queues
Idling queues
Stucked queues
+
+
diff --git a/var/www/templates/terms_management.html b/var/www/templates/terms_management.html index 32a51c32..e4e6e5f1 100644 --- a/var/www/templates/terms_management.html +++ b/var/www/templates/terms_management.html @@ -12,7 +12,6 @@ - diff --git a/var/www/templates/terms_plot_top.html b/var/www/templates/terms_plot_top.html index fd7a2be8..cf11bc88 100644 --- a/var/www/templates/terms_plot_top.html +++ b/var/www/templates/terms_plot_top.html @@ -12,7 +12,6 @@ - diff --git a/var/www/update_thirdparty.sh b/var/www/update_thirdparty.sh index feae55df..471e8896 100755 --- a/var/www/update_thirdparty.sh +++ b/var/www/update_thirdparty.sh @@ -39,12 +39,17 @@ wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.pie.js -O ./ wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.time.js -O ./static/js/jquery.flot.time.js wget https://raw.githubusercontent.com/flot/flot/master/jquery.flot.stack.js -O ./static/js/jquery.flot.stack.js -#Ressources for sparkline and canvasJS +#Ressources for sparkline and canvasJS and slider wget http://omnipotent.net/jquery.sparkline/2.1.2/jquery.sparkline.min.js -O ./static/js/jquery.sparkline.min.js mkdir temp wget http://canvasjs.com/fdm/chart/ -O temp/canvasjs.zip unzip temp/canvasjs.zip -d temp/ mv temp/jquery.canvasjs.min.js ./static/js/jquery.canvasjs.min.js + +wget https://jqueryui.com/resources/download/jquery-ui-1.12.0.zip -O temp/jquery-ui.zip +unzip temp/jquery-ui.zip -d temp/ +mv temp/jquery-ui-1.12.0/jquery-ui.min.js ./static/js/jquery-ui.min.js +mv temp/jquery-ui-1.12.0/jquery-ui.min.css ./static/css/jquery-ui.min.css rm -rf temp mkdir -p ./static/image